[RISCV] Select 5 bit immediate for VSETIVLI during isel rather than peepholing in the custom inserter.

This adds a special operand type that is allowed to be either
an immediate or register. By giving it a unique operand type the
machine verifier will ignore it.

This perturbs a lot of tests but mostly it is just slightly different
instruction orders. Something bad did happen to some min/max reduction
tests. We're spilling vector registers when we weren't before.

Reviewed By: khchen

Differential Revision: https://reviews.llvm.org/D101246
This commit is contained in:
Craig Topper 2021-04-27 12:48:44 -07:00
parent a495b672b7
commit ce09dd54e6
30 changed files with 1015 additions and 1062 deletions

View File

@ -117,7 +117,8 @@ enum OperandType : unsigned {
OPERAND_SIMM12,
OPERAND_UIMM20,
OPERAND_UIMMLOG2XLEN,
OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN
OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN,
OPERAND_AVL,
};
} // namespace RISCVOp

View File

@ -1340,17 +1340,13 @@ bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const {
return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF);
}
// X0 has special meaning for vsetvl/vsetvli.
// rd | rs1 | AVL value | Effect on vl
//--------------------------------------------------------------
// !X0 | X0 | VLMAX | Set vl to VLMAX
// X0 | X0 | Value in vl | Keep current vl, just change vtype.
// Select VL as a 5 bit immediate or a value that will become a register. This
// allows us to choose betwen VSETIVLI or VSETVLI later.
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
// If the VL value is a constant 0, manually select it to an ADDI with 0
// immediate to prevent the default selection path from matching it to X0.
auto *C = dyn_cast<ConstantSDNode>(N);
if (C && C->isNullValue())
VL = SDValue(selectImm(CurDAG, SDLoc(N), 0, Subtarget->getXLenVT()), 0);
if (C && isUInt<5>(C->getZExtValue()))
VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
N->getValueType(0));
else
VL = N;

View File

@ -6136,24 +6136,15 @@ static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
auto BuildVSETVLI = [&]() {
if (VLIndex >= 0) {
Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
const MachineOperand &VLOp = MI.getOperand(VLIndex);
// VL can be a register or an immediate.
if (VLOp.isImm())
return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addImm(VLOp.getImm());
Register VLReg = MI.getOperand(VLIndex).getReg();
// VL might be a compile time constant, but isel would have to put it
// in a register. See if VL comes from an ADDI X0, imm.
if (VLReg.isVirtual()) {
MachineInstr *Def = MRI.getVRegDef(VLReg);
if (Def && Def->getOpcode() == RISCV::ADDI &&
Def->getOperand(1).getReg() == RISCV::X0 &&
Def->getOperand(2).isImm()) {
uint64_t Imm = Def->getOperand(2).getImm();
// VSETIVLI allows a 5-bit zero extended immediate.
if (isUInt<5>(Imm))
return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addImm(Imm);
}
}
return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(VLReg);
@ -6193,7 +6184,7 @@ static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
/*MaskAgnostic*/ false));
// Remove (now) redundant operands from pseudo
if (VLIndex >= 0) {
if (VLIndex >= 0 && MI.getOperand(VLIndex).isReg()) {
MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
MI.getOperand(VLIndex).setIsKill(false);
}

View File

@ -20,6 +20,14 @@ def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
// Operand that is allowed to be a register or a 5 bit immediate.
// This allows us to pick between VSETIVLI and VSETVLI opcodes using the same
// pseudo instructions.
def AVL : RegisterOperand<GPR> {
let OperandNamespace = "RISCVOp";
let OperandType = "OPERAND_AVL";
}
// X0 has special meaning for vsetvl/vsetvli.
// rd | rs1 | AVL value | Effect on vl
//--------------------------------------------------------------
@ -115,7 +123,7 @@ class octuple_to_str<int octuple> {
"NoDef")))))));
}
def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT GPR:$vl)))>;
def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>;
// Output pattern for X0 used to represent VLMAX in the pseudo instructions.
def VLMax : OutPatFrag<(ops), (XLenVT X0)>;
@ -603,7 +611,7 @@ class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins> :
class VPseudoUSLoadNoMask<VReg RetClass, bits<7> EEW, bit isFF> :
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>,
(ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/isFF, EEW, VLMul> {
let mayLoad = 1;
@ -621,7 +629,7 @@ class VPseudoUSLoadMask<VReg RetClass, bits<7> EEW, bit isFF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/isFF, EEW, VLMul> {
let mayLoad = 1;
@ -638,7 +646,7 @@ class VPseudoUSLoadMask<VReg RetClass, bits<7> EEW, bit isFF> :
class VPseudoSLoadNoMask<VReg RetClass, bits<7> EEW>:
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, GPR:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
(ins GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, EEW, VLMul> {
let mayLoad = 1;
@ -656,7 +664,7 @@ class VPseudoSLoadMask<VReg RetClass, bits<7> EEW>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1, GPR:$rs2,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, EEW, VLMul> {
let mayLoad = 1;
@ -674,7 +682,7 @@ class VPseudoSLoadMask<VReg RetClass, bits<7> EEW>:
class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
bit Ordered, bit EarlyClobber>:
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
(ins GPR:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLX</*Masked*/0, Ordered, EEW, VLMul, LMUL> {
let mayLoad = 1;
@ -694,7 +702,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1, IdxClass:$rs2,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLX</*Masked*/1, Ordered, EEW, VLMul, LMUL> {
let mayLoad = 1;
@ -711,7 +719,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
class VPseudoUSStoreNoMask<VReg StClass, bits<7> EEW>:
Pseudo<(outs),
(ins StClass:$rd, GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>,
(ins StClass:$rd, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSE</*Masked*/0, /*Strided*/0, EEW, VLMul> {
let mayLoad = 0;
@ -727,7 +735,7 @@ class VPseudoUSStoreNoMask<VReg StClass, bits<7> EEW>:
class VPseudoUSStoreMask<VReg StClass, bits<7> EEW>:
Pseudo<(outs),
(ins StClass:$rd, GPR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
(ins StClass:$rd, GPR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSE</*Masked*/1, /*Strided*/0, EEW, VLMul> {
let mayLoad = 0;
@ -742,7 +750,7 @@ class VPseudoUSStoreMask<VReg StClass, bits<7> EEW>:
class VPseudoSStoreNoMask<VReg StClass, bits<7> EEW>:
Pseudo<(outs),
(ins StClass:$rd, GPR:$rs1, GPR:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
(ins StClass:$rd, GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSE</*Masked*/0, /*Strided*/1, EEW, VLMul> {
let mayLoad = 0;
@ -758,7 +766,7 @@ class VPseudoSStoreNoMask<VReg StClass, bits<7> EEW>:
class VPseudoSStoreMask<VReg StClass, bits<7> EEW>:
Pseudo<(outs),
(ins StClass:$rd, GPR:$rs1, GPR:$rs2, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
(ins StClass:$rd, GPR:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSE</*Masked*/1, /*Strided*/1, EEW, VLMul> {
let mayLoad = 0;
@ -775,7 +783,7 @@ class VPseudoSStoreMask<VReg StClass, bits<7> EEW>:
class VPseudoUnaryNoDummyMask<VReg RetClass,
DAGOperand Op2Class> :
Pseudo<(outs RetClass:$rd),
(ins Op2Class:$rs1, GPR:$vl, ixlenimm:$sew), []>,
(ins Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -789,7 +797,7 @@ class VPseudoUnaryNoDummyMask<VReg RetClass,
class VPseudoNullaryNoMask<VReg RegClass>:
Pseudo<(outs RegClass:$rd),
(ins GPR:$vl, ixlenimm:$sew),
(ins AVL:$vl, ixlenimm:$sew),
[]>, RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -804,7 +812,7 @@ class VPseudoNullaryNoMask<VReg RegClass>:
class VPseudoNullaryMask<VReg RegClass>:
Pseudo<(outs GetVRegNoV0<RegClass>.R:$rd),
(ins GetVRegNoV0<RegClass>.R:$merge, VMaskOp:$vm, GPR:$vl,
(ins GetVRegNoV0<RegClass>.R:$merge, VMaskOp:$vm, AVL:$vl,
ixlenimm:$sew), []>, RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -821,7 +829,7 @@ class VPseudoNullaryMask<VReg RegClass>:
// Nullary for pseudo instructions. They are expanded in
// RISCVExpandPseudoInsts pass.
class VPseudoNullaryPseudoM<string BaseInst>
: Pseudo<(outs VR:$rd), (ins GPR:$vl, ixlenimm:$sew), []>,
: Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -838,7 +846,7 @@ class VPseudoNullaryPseudoM<string BaseInst>
// RetClass could be GPR or VReg.
class VPseudoUnaryNoMask<DAGOperand RetClass, VReg OpClass, string Constraint = ""> :
Pseudo<(outs RetClass:$rd),
(ins OpClass:$rs2, GPR:$vl, ixlenimm:$sew), []>,
(ins OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -855,7 +863,7 @@ class VPseudoUnaryNoMask<DAGOperand RetClass, VReg OpClass, string Constraint =
class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -872,7 +880,7 @@ class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
// mask unary operation without maskedoff
class VPseudoMaskUnarySOutMask:
Pseudo<(outs GPR:$rd),
(ins VR:$rs1, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
(ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -890,7 +898,7 @@ class VPseudoUnaryAnyMask<VReg RetClass,
Pseudo<(outs RetClass:$rd),
(ins RetClass:$merge,
Op1Class:$rs2,
VR:$vm, GPR:$vl, ixlenimm:$sew),
VR:$vm, AVL:$vl, ixlenimm:$sew),
[]>,
RISCVVPseudo {
let mayLoad = 0;
@ -910,7 +918,7 @@ class VPseudoBinaryNoMask<VReg RetClass,
DAGOperand Op2Class,
string Constraint> :
Pseudo<(outs RetClass:$rd),
(ins Op1Class:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew), []>,
(ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -927,7 +935,7 @@ class VPseudoBinaryNoMask<VReg RetClass,
class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
bit Ordered>:
Pseudo<(outs),
(ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
(ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSX</*Masked*/0, Ordered, EEW, VLMul, LMUL> {
let mayLoad = 0;
@ -944,7 +952,7 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL
class VPseudoIStoreMask<VReg StClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
bit Ordered>:
Pseudo<(outs),
(ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
(ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSX</*Masked*/1, Ordered, EEW, VLMul, LMUL> {
let mayLoad = 0;
@ -964,7 +972,7 @@ class VPseudoBinaryMask<VReg RetClass,
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
Op1Class:$rs2, Op2Class:$rs1,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -986,7 +994,7 @@ class VPseudoBinaryMOutMask<VReg RetClass,
Pseudo<(outs RetClass:$rd),
(ins RetClass:$merge,
Op1Class:$rs2, Op2Class:$rs1,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -1008,9 +1016,9 @@ class VPseudoBinaryCarryIn<VReg RetClass,
string Constraint> :
Pseudo<(outs RetClass:$rd),
!if(CarryIn,
(ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, GPR:$vl,
(ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl,
ixlenimm:$sew),
(ins Op1Class:$rs2, Op2Class:$rs1, GPR:$vl, ixlenimm:$sew)), []>,
(ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@ -1031,7 +1039,7 @@ class VPseudoTernaryNoMask<VReg RetClass,
string Constraint> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
GPR:$vl, ixlenimm:$sew),
AVL:$vl, ixlenimm:$sew),
[]>,
RISCVVPseudo {
let mayLoad = 0;
@ -1053,7 +1061,7 @@ class VPseudoAMOWDNoMask<VReg RetClass,
(ins GPR:$rs1,
Op1Class:$vs2,
GetVRegNoV0<RetClass>.R:$vd,
GPR:$vl, ixlenimm:$sew), []>,
AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 1;
let mayStore = 1;
@ -1073,7 +1081,7 @@ class VPseudoAMOWDMask<VReg RetClass,
(ins GPR:$rs1,
Op1Class:$vs2,
GetVRegNoV0<RetClass>.R:$vd,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew), []>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
let mayLoad = 1;
let mayStore = 1;
@ -1115,7 +1123,7 @@ multiclass VPseudoAMO {
class VPseudoUSSegLoadNoMask<VReg RetClass, bits<7> EEW, bits<4> NF, bit isFF>:
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>,
(ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/isFF, EEW, VLMul> {
let mayLoad = 1;
@ -1132,7 +1140,7 @@ class VPseudoUSSegLoadNoMask<VReg RetClass, bits<7> EEW, bits<4> NF, bit isFF>:
class VPseudoUSSegLoadMask<VReg RetClass, bits<7> EEW, bits<4> NF, bit isFF>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/isFF, EEW, VLMul> {
let mayLoad = 1;
@ -1149,7 +1157,7 @@ class VPseudoUSSegLoadMask<VReg RetClass, bits<7> EEW, bits<4> NF, bit isFF>:
class VPseudoSSegLoadNoMask<VReg RetClass, bits<7> EEW, bits<4> NF>:
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, GPR:$offset, GPR:$vl, ixlenimm:$sew),[]>,
(ins GPR:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/0, /*Strided*/1, /*FF*/0, EEW, VLMul> {
let mayLoad = 1;
@ -1167,7 +1175,7 @@ class VPseudoSSegLoadNoMask<VReg RetClass, bits<7> EEW, bits<4> NF>:
class VPseudoSSegLoadMask<VReg RetClass, bits<7> EEW, bits<4> NF>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
GPR:$offset, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, EEW, VLMul> {
let mayLoad = 1;
@ -1185,7 +1193,7 @@ class VPseudoSSegLoadMask<VReg RetClass, bits<7> EEW, bits<4> NF>:
class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
bits<4> NF, bit Ordered>:
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, IdxClass:$offset, GPR:$vl, ixlenimm:$sew),[]>,
(ins GPR:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLXSEG<NF, /*Masked*/0, Ordered, EEW, VLMul, LMUL> {
let mayLoad = 1;
@ -1206,7 +1214,7 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMU
bits<4> NF, bit Ordered>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
IdxClass:$offset, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVLXSEG<NF, /*Masked*/1, Ordered, EEW, VLMul, LMUL> {
let mayLoad = 1;
@ -1225,7 +1233,7 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMU
class VPseudoUSSegStoreNoMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
Pseudo<(outs),
(ins ValClass:$rd, GPR:$rs1, GPR:$vl, ixlenimm:$sew),[]>,
(ins ValClass:$rd, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/0, /*Strided*/0, EEW, VLMul> {
let mayLoad = 0;
@ -1242,7 +1250,7 @@ class VPseudoUSSegStoreNoMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
class VPseudoUSSegStoreMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
Pseudo<(outs),
(ins ValClass:$rd, GPR:$rs1,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/1, /*Strided*/0, EEW, VLMul> {
let mayLoad = 0;
@ -1257,7 +1265,7 @@ class VPseudoUSSegStoreMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
class VPseudoSSegStoreNoMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
Pseudo<(outs),
(ins ValClass:$rd, GPR:$rs1, GPR: $offset, GPR:$vl, ixlenimm:$sew),[]>,
(ins ValClass:$rd, GPR:$rs1, GPR: $offset, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/0, /*Strided*/1, EEW, VLMul> {
let mayLoad = 0;
@ -1274,7 +1282,7 @@ class VPseudoSSegStoreNoMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
class VPseudoSSegStoreMask<VReg ValClass, bits<7> EEW, bits<4> NF>:
Pseudo<(outs),
(ins ValClass:$rd, GPR:$rs1, GPR: $offset,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSSEG<NF, /*Masked*/1, /*Strided*/1, EEW, VLMul> {
let mayLoad = 0;
@ -1291,7 +1299,7 @@ class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, bits<7> EEW, bits<3>
bits<4> NF, bit Ordered>:
Pseudo<(outs),
(ins ValClass:$rd, GPR:$rs1, IdxClass: $index,
GPR:$vl, ixlenimm:$sew),[]>,
AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSXSEG<NF, /*Masked*/0, Ordered, EEW, VLMul, LMUL> {
let mayLoad = 0;
@ -1309,7 +1317,7 @@ class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, bits<7> EEW, bits<3> LM
bits<4> NF, bit Ordered>:
Pseudo<(outs),
(ins ValClass:$rd, GPR:$rs1, IdxClass: $index,
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
RISCVVSXSEG<NF, /*Masked*/1, Ordered, EEW, VLMul, LMUL> {
let mayLoad = 0;
@ -3794,7 +3802,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
ForceTailAgnostic = true, Constraints = "$rd = $rs1" in
def PseudoVMV_S_X # "_" # m.MX: Pseudo<(outs m.vrclass:$rd),
(ins m.vrclass:$rs1, GPR:$rs2,
GPR:$vl, ixlenimm:$sew),
AVL:$vl, ixlenimm:$sew),
[]>, RISCVVPseudo;
}
}
@ -3822,7 +3830,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1,
def "PseudoVFMV_S_" # f.FX # "_" # m.MX :
Pseudo<(outs m.vrclass:$rd),
(ins m.vrclass:$rs1, f.fprclass:$rs2,
GPR:$vl, ixlenimm:$sew),
AVL:$vl, ixlenimm:$sew),
[]>, RISCVVPseudo;
}
}

View File

@ -377,24 +377,24 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a3, a3, -256
; LMULMAX2-RV32-NEXT: vand.vx v27, v27, a3
; LMULMAX2-RV32-NEXT: vor.vv v26, v27, v26
; LMULMAX2-RV32-NEXT: addi a4, zero, 5
; LMULMAX2-RV32-NEXT: vsetivli a5, 1, e8,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a4
; LMULMAX2-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.i v27, 0
; LMULMAX2-RV32-NEXT: lui a4, 1044480
; LMULMAX2-RV32-NEXT: vmerge.vxm v27, v27, a4, v0
; LMULMAX2-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vi v28, v25, 8
; LMULMAX2-RV32-NEXT: vand.vv v27, v28, v27
; LMULMAX2-RV32-NEXT: vsrl.vi v28, v25, 24
; LMULMAX2-RV32-NEXT: vsrl.vi v27, v25, 24
; LMULMAX2-RV32-NEXT: lui a4, 4080
; LMULMAX2-RV32-NEXT: vand.vx v28, v28, a4
; LMULMAX2-RV32-NEXT: vor.vv v27, v27, v28
; LMULMAX2-RV32-NEXT: vor.vv v26, v27, v26
; LMULMAX2-RV32-NEXT: addi a5, zero, 255
; LMULMAX2-RV32-NEXT: vand.vx v27, v27, a4
; LMULMAX2-RV32-NEXT: addi a5, zero, 5
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5
; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.x v27, a5
; LMULMAX2-RV32-NEXT: vmv.v.i v28, 0
; LMULMAX2-RV32-NEXT: lui a1, 1044480
; LMULMAX2-RV32-NEXT: vmerge.vxm v28, v28, a1, v0
; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vi v29, v25, 8
; LMULMAX2-RV32-NEXT: vand.vv v28, v29, v28
; LMULMAX2-RV32-NEXT: vor.vv v27, v28, v27
; LMULMAX2-RV32-NEXT: vor.vv v26, v27, v26
; LMULMAX2-RV32-NEXT: addi a1, zero, 255
; LMULMAX2-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.x v27, a1
; LMULMAX2-RV32-NEXT: vmerge.vim v27, v27, 0, v0
; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsll.vi v28, v25, 8
@ -406,12 +406,12 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsll.vi v29, v25, 24
; LMULMAX2-RV32-NEXT: vand.vv v28, v29, v28
; LMULMAX2-RV32-NEXT: vor.vv v27, v28, v27
; LMULMAX2-RV32-NEXT: vsll.vx v28, v25, a2
; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.x v28, a4
; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 0, v0
; LMULMAX2-RV32-NEXT: vmv.v.x v29, a4
; LMULMAX2-RV32-NEXT: vmerge.vim v29, v29, 0, v0
; LMULMAX2-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsll.vx v29, v25, a2
; LMULMAX2-RV32-NEXT: vand.vv v28, v29, v28
; LMULMAX2-RV32-NEXT: vand.vv v28, v28, v29
; LMULMAX2-RV32-NEXT: vsll.vx v25, v25, a6
; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v28
; LMULMAX2-RV32-NEXT: vor.vv v25, v25, v27
@ -577,24 +577,24 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a3, a3, -256
; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a3
; LMULMAX1-RV32-NEXT: vor.vv v26, v27, v26
; LMULMAX1-RV32-NEXT: addi a4, zero, 5
; LMULMAX1-RV32-NEXT: vsetivli a5, 1, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.s.x v0, a4
; LMULMAX1-RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.i v27, 0
; LMULMAX1-RV32-NEXT: lui a4, 1044480
; LMULMAX1-RV32-NEXT: vmerge.vxm v27, v27, a4, v0
; LMULMAX1-RV32-NEXT: vsetivli a4, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vi v28, v25, 8
; LMULMAX1-RV32-NEXT: vand.vv v27, v28, v27
; LMULMAX1-RV32-NEXT: vsrl.vi v28, v25, 24
; LMULMAX1-RV32-NEXT: vsrl.vi v27, v25, 24
; LMULMAX1-RV32-NEXT: lui a4, 4080
; LMULMAX1-RV32-NEXT: vand.vx v28, v28, a4
; LMULMAX1-RV32-NEXT: vor.vv v27, v27, v28
; LMULMAX1-RV32-NEXT: vor.vv v26, v27, v26
; LMULMAX1-RV32-NEXT: addi a5, zero, 255
; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a4
; LMULMAX1-RV32-NEXT: addi a5, zero, 5
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5
; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v27, a5
; LMULMAX1-RV32-NEXT: vmv.v.i v28, 0
; LMULMAX1-RV32-NEXT: lui a1, 1044480
; LMULMAX1-RV32-NEXT: vmerge.vxm v28, v28, a1, v0
; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vi v29, v25, 8
; LMULMAX1-RV32-NEXT: vand.vv v28, v29, v28
; LMULMAX1-RV32-NEXT: vor.vv v27, v28, v27
; LMULMAX1-RV32-NEXT: vor.vv v26, v27, v26
; LMULMAX1-RV32-NEXT: addi a1, zero, 255
; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v27, a1
; LMULMAX1-RV32-NEXT: vmerge.vim v27, v27, 0, v0
; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsll.vi v28, v25, 8
@ -606,12 +606,12 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vsll.vi v29, v25, 24
; LMULMAX1-RV32-NEXT: vand.vv v28, v29, v28
; LMULMAX1-RV32-NEXT: vor.vv v27, v28, v27
; LMULMAX1-RV32-NEXT: vsll.vx v28, v25, a2
; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v28, a4
; LMULMAX1-RV32-NEXT: vmerge.vim v28, v28, 0, v0
; LMULMAX1-RV32-NEXT: vmv.v.x v29, a4
; LMULMAX1-RV32-NEXT: vmerge.vim v29, v29, 0, v0
; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsll.vx v29, v25, a2
; LMULMAX1-RV32-NEXT: vand.vv v28, v29, v28
; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v29
; LMULMAX1-RV32-NEXT: vsll.vx v25, v25, a6
; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v28
; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27
@ -1241,24 +1241,24 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a3, a3, -256
; LMULMAX2-RV32-NEXT: vand.vx v30, v30, a3
; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28
; LMULMAX2-RV32-NEXT: addi a4, zero, 85
; LMULMAX2-RV32-NEXT: vsetivli a5, 1, e8,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a4
; LMULMAX2-RV32-NEXT: vsetivli a4, 8, e32,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0
; LMULMAX2-RV32-NEXT: lui a4, 1044480
; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a4, v0
; LMULMAX2-RV32-NEXT: vsetivli a4, 4, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vi v8, v26, 8
; LMULMAX2-RV32-NEXT: vand.vv v30, v8, v30
; LMULMAX2-RV32-NEXT: vsrl.vi v8, v26, 24
; LMULMAX2-RV32-NEXT: vsrl.vi v30, v26, 24
; LMULMAX2-RV32-NEXT: lui a4, 4080
; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4
; LMULMAX2-RV32-NEXT: vor.vv v30, v30, v8
; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28
; LMULMAX2-RV32-NEXT: addi a5, zero, 255
; LMULMAX2-RV32-NEXT: vand.vx v30, v30, a4
; LMULMAX2-RV32-NEXT: addi a5, zero, 85
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5
; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.x v30, a5
; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0
; LMULMAX2-RV32-NEXT: lui a1, 1044480
; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v8, a1, v0
; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vi v10, v26, 8
; LMULMAX2-RV32-NEXT: vand.vv v8, v10, v8
; LMULMAX2-RV32-NEXT: vor.vv v30, v8, v30
; LMULMAX2-RV32-NEXT: vor.vv v28, v30, v28
; LMULMAX2-RV32-NEXT: addi a1, zero, 255
; LMULMAX2-RV32-NEXT: vsetivli a5, 8, e32,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1
; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 0, v0
; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsll.vi v8, v26, 8
@ -1270,12 +1270,12 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vsll.vi v10, v26, 24
; LMULMAX2-RV32-NEXT: vand.vv v8, v10, v8
; LMULMAX2-RV32-NEXT: vor.vv v30, v8, v30
; LMULMAX2-RV32-NEXT: vsll.vx v8, v26, a2
; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.x v8, a4
; LMULMAX2-RV32-NEXT: vmerge.vim v8, v8, 0, v0
; LMULMAX2-RV32-NEXT: vmv.v.x v10, a4
; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 0, v0
; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsll.vx v10, v26, a2
; LMULMAX2-RV32-NEXT: vand.vv v8, v10, v8
; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10
; LMULMAX2-RV32-NEXT: vsll.vx v26, v26, a6
; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v8
; LMULMAX2-RV32-NEXT: vor.vv v26, v26, v30
@ -1435,14 +1435,17 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a0, 16
; LMULMAX1-RV32-NEXT: vle64.v v30, (a1)
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV32-NEXT: addi a6, zero, 56
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v30, a6
; LMULMAX1-RV32-NEXT: addi a7, zero, 40
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v30, a7
; LMULMAX1-RV32-NEXT: addi a7, zero, 56
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v30, a7
; LMULMAX1-RV32-NEXT: addi a3, zero, 40
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v30, a3
; LMULMAX1-RV32-NEXT: lui a4, 16
; LMULMAX1-RV32-NEXT: addi a4, a4, -256
; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a4
; LMULMAX1-RV32-NEXT: vor.vv v27, v27, v26
; LMULMAX1-RV32-NEXT: vsrl.vi v26, v30, 24
; LMULMAX1-RV32-NEXT: lui a6, 4080
; LMULMAX1-RV32-NEXT: vand.vx v28, v26, a6
; LMULMAX1-RV32-NEXT: addi a5, zero, 5
; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5
@ -1451,15 +1454,12 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: lui a2, 1044480
; LMULMAX1-RV32-NEXT: vmerge.vxm v26, v26, a2, v0
; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vi v28, v30, 8
; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v26
; LMULMAX1-RV32-NEXT: vsrl.vi v29, v30, 24
; LMULMAX1-RV32-NEXT: lui a5, 4080
; LMULMAX1-RV32-NEXT: vand.vx v29, v29, a5
; LMULMAX1-RV32-NEXT: vor.vv v28, v28, v29
; LMULMAX1-RV32-NEXT: vsrl.vi v29, v30, 8
; LMULMAX1-RV32-NEXT: vand.vv v29, v29, v26
; LMULMAX1-RV32-NEXT: vor.vv v28, v29, v28
; LMULMAX1-RV32-NEXT: vor.vv v31, v28, v27
; LMULMAX1-RV32-NEXT: addi a2, zero, 255
; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v27, a2
; LMULMAX1-RV32-NEXT: vmerge.vim v27, v27, 0, v0
; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
@ -1472,26 +1472,26 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vsll.vi v8, v30, 24
; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v28
; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v29
; LMULMAX1-RV32-NEXT: vsll.vx v9, v30, a3
; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v29, a5
; LMULMAX1-RV32-NEXT: vmv.v.x v29, a6
; LMULMAX1-RV32-NEXT: vmerge.vim v29, v29, 0, v0
; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsll.vx v9, v30, a7
; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v29
; LMULMAX1-RV32-NEXT: vsll.vx v30, v30, a6
; LMULMAX1-RV32-NEXT: vsll.vx v30, v30, a7
; LMULMAX1-RV32-NEXT: vor.vv v30, v30, v9
; LMULMAX1-RV32-NEXT: vor.vv v30, v30, v8
; LMULMAX1-RV32-NEXT: vor.vv v31, v30, v31
; LMULMAX1-RV32-NEXT: lui a2, 61681
; LMULMAX1-RV32-NEXT: addi a2, a2, -241
; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v30, a2
; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vand.vv v8, v31, v30
; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4
; LMULMAX1-RV32-NEXT: lui a2, 986895
; LMULMAX1-RV32-NEXT: addi a2, a2, 240
; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v9, a2
; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v9
@ -1499,14 +1499,14 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v8
; LMULMAX1-RV32-NEXT: lui a2, 209715
; LMULMAX1-RV32-NEXT: addi a2, a2, 819
; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2
; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vand.vv v10, v31, v8
; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 2
; LMULMAX1-RV32-NEXT: lui a2, 838861
; LMULMAX1-RV32-NEXT: addi a2, a2, -820
; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2
; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v11
@ -1514,27 +1514,27 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v10
; LMULMAX1-RV32-NEXT: lui a2, 349525
; LMULMAX1-RV32-NEXT: addi a2, a2, 1365
; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2
; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vand.vv v12, v31, v10
; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 1
; LMULMAX1-RV32-NEXT: lui a2, 699051
; LMULMAX1-RV32-NEXT: addi a2, a2, -1366
; LMULMAX1-RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2
; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vand.vv v31, v31, v13
; LMULMAX1-RV32-NEXT: vsrl.vi v31, v31, 1
; LMULMAX1-RV32-NEXT: vor.vv v31, v31, v12
; LMULMAX1-RV32-NEXT: vsrl.vx v12, v25, a6
; LMULMAX1-RV32-NEXT: vsrl.vx v14, v25, a7
; LMULMAX1-RV32-NEXT: vsrl.vx v12, v25, a7
; LMULMAX1-RV32-NEXT: vsrl.vx v14, v25, a3
; LMULMAX1-RV32-NEXT: vand.vx v14, v14, a4
; LMULMAX1-RV32-NEXT: vor.vv v12, v14, v12
; LMULMAX1-RV32-NEXT: vsrl.vi v14, v25, 8
; LMULMAX1-RV32-NEXT: vand.vv v26, v14, v26
; LMULMAX1-RV32-NEXT: vsrl.vi v14, v25, 24
; LMULMAX1-RV32-NEXT: vand.vx v14, v14, a5
; LMULMAX1-RV32-NEXT: vand.vx v14, v14, a6
; LMULMAX1-RV32-NEXT: vsrl.vi v15, v25, 8
; LMULMAX1-RV32-NEXT: vand.vv v26, v15, v26
; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v14
; LMULMAX1-RV32-NEXT: vor.vv v26, v26, v12
; LMULMAX1-RV32-NEXT: vsll.vi v12, v25, 8
@ -1542,9 +1542,9 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vsll.vi v12, v25, 24
; LMULMAX1-RV32-NEXT: vand.vv v28, v12, v28
; LMULMAX1-RV32-NEXT: vor.vv v27, v28, v27
; LMULMAX1-RV32-NEXT: vsll.vx v28, v25, a7
; LMULMAX1-RV32-NEXT: vsll.vx v28, v25, a3
; LMULMAX1-RV32-NEXT: vand.vv v28, v28, v29
; LMULMAX1-RV32-NEXT: vsll.vx v25, v25, a6
; LMULMAX1-RV32-NEXT: vsll.vx v25, v25, a7
; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v28
; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v27
; LMULMAX1-RV32-NEXT: vor.vv v25, v25, v26

View File

@ -614,64 +614,64 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV64: # %bb.0:
; LMULMAX2-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX2-RV64-NEXT: vle64.v v25, (a0)
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 1
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: srli a2, a1, 40
; LMULMAX2-RV64-NEXT: lui a3, 16
; LMULMAX2-RV64-NEXT: addiw a7, a3, -256
; LMULMAX2-RV64-NEXT: and a2, a2, a7
; LMULMAX2-RV64-NEXT: srli a4, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: or t0, a2, a4
; LMULMAX2-RV64-NEXT: srli a4, a1, 24
; LMULMAX2-RV64-NEXT: lui a6, 4080
; LMULMAX2-RV64-NEXT: and a4, a4, a6
; LMULMAX2-RV64-NEXT: srli a5, a1, 8
; LMULMAX2-RV64-NEXT: addi a3, zero, 255
; LMULMAX2-RV64-NEXT: slli t0, a3, 24
; LMULMAX2-RV64-NEXT: and a5, a5, t0
; LMULMAX2-RV64-NEXT: slli a2, a3, 24
; LMULMAX2-RV64-NEXT: and a5, a5, a2
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: or t1, a4, a2
; LMULMAX2-RV64-NEXT: slli a4, a1, 8
; LMULMAX2-RV64-NEXT: slli t2, a3, 32
; LMULMAX2-RV64-NEXT: or t0, a4, t0
; LMULMAX2-RV64-NEXT: slli a5, a1, 8
; LMULMAX2-RV64-NEXT: slli t1, a3, 32
; LMULMAX2-RV64-NEXT: and a5, a5, t1
; LMULMAX2-RV64-NEXT: slli a4, a1, 24
; LMULMAX2-RV64-NEXT: slli t2, a3, 40
; LMULMAX2-RV64-NEXT: and a4, a4, t2
; LMULMAX2-RV64-NEXT: slli a2, a1, 24
; LMULMAX2-RV64-NEXT: slli t3, a3, 40
; LMULMAX2-RV64-NEXT: and a2, a2, t3
; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: slli a4, a1, 40
; LMULMAX2-RV64-NEXT: or a4, a4, a5
; LMULMAX2-RV64-NEXT: slli a5, a1, 40
; LMULMAX2-RV64-NEXT: slli a3, a3, 48
; LMULMAX2-RV64-NEXT: and a4, a4, a3
; LMULMAX2-RV64-NEXT: and a5, a5, a3
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a1, a5
; LMULMAX2-RV64-NEXT: or a1, a1, a4
; LMULMAX2-RV64-NEXT: or a1, a1, t0
; LMULMAX2-RV64-NEXT: vsetivli a4, 2, e64,m1,ta,mu
; LMULMAX2-RV64-NEXT: vmv.v.x v26, a1
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV64-NEXT: srli a4, a1, 24
; LMULMAX2-RV64-NEXT: and a4, a4, a6
; LMULMAX2-RV64-NEXT: srli a5, a1, 8
; LMULMAX2-RV64-NEXT: and a2, a5, a2
; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: srli a4, a1, 40
; LMULMAX2-RV64-NEXT: and a4, a4, a7
; LMULMAX2-RV64-NEXT: srli a5, a1, 56
; LMULMAX2-RV64-NEXT: or a4, a4, a5
; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: slli a4, a1, 8
; LMULMAX2-RV64-NEXT: and a4, a4, t1
; LMULMAX2-RV64-NEXT: slli a5, a1, 24
; LMULMAX2-RV64-NEXT: and a5, a5, t2
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: slli a5, a1, 40
; LMULMAX2-RV64-NEXT: and a3, a5, a3
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a1, a3
; LMULMAX2-RV64-NEXT: or a1, a1, a4
; LMULMAX2-RV64-NEXT: or a1, a1, a2
; LMULMAX2-RV64-NEXT: or t1, a1, t1
; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; LMULMAX2-RV64-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX2-RV64-NEXT: vmv.x.s a2, v25
; LMULMAX2-RV64-NEXT: srli a4, a2, 40
; LMULMAX2-RV64-NEXT: and a4, a4, a7
; LMULMAX2-RV64-NEXT: srli a1, a2, 56
; LMULMAX2-RV64-NEXT: or a1, a4, a1
; LMULMAX2-RV64-NEXT: srli a4, a2, 24
; LMULMAX2-RV64-NEXT: and a4, a4, a6
; LMULMAX2-RV64-NEXT: srli a5, a2, 8
; LMULMAX2-RV64-NEXT: and a5, a5, t0
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: or a1, a4, a1
; LMULMAX2-RV64-NEXT: slli a4, a2, 8
; LMULMAX2-RV64-NEXT: and a4, a4, t2
; LMULMAX2-RV64-NEXT: slli a5, a2, 24
; LMULMAX2-RV64-NEXT: and a5, a5, t3
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: slli a5, a2, 40
; LMULMAX2-RV64-NEXT: and a3, a5, a3
; LMULMAX2-RV64-NEXT: slli a2, a2, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a3
; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX2-RV64-NEXT: vmv.v.x v25, a1
; LMULMAX2-RV64-NEXT: vmv.s.x v25, t1
; LMULMAX2-RV64-NEXT: vse64.v v25, (a0)
; LMULMAX2-RV64-NEXT: vmv.s.x v26, a1
; LMULMAX2-RV64-NEXT: vse64.v v26, (a0)
; LMULMAX2-RV64-NEXT: ret
;
; LMULMAX1-RV32-LABEL: bswap_v2i64:
@ -743,64 +743,64 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 1
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV64-NEXT: srli a2, a1, 40
; LMULMAX1-RV64-NEXT: lui a3, 16
; LMULMAX1-RV64-NEXT: addiw a7, a3, -256
; LMULMAX1-RV64-NEXT: and a2, a2, a7
; LMULMAX1-RV64-NEXT: srli a4, a1, 56
; LMULMAX1-RV64-NEXT: or a2, a2, a4
; LMULMAX1-RV64-NEXT: or t0, a2, a4
; LMULMAX1-RV64-NEXT: srli a4, a1, 24
; LMULMAX1-RV64-NEXT: lui a6, 4080
; LMULMAX1-RV64-NEXT: and a4, a4, a6
; LMULMAX1-RV64-NEXT: srli a5, a1, 8
; LMULMAX1-RV64-NEXT: addi a3, zero, 255
; LMULMAX1-RV64-NEXT: slli t0, a3, 24
; LMULMAX1-RV64-NEXT: and a5, a5, t0
; LMULMAX1-RV64-NEXT: slli a2, a3, 24
; LMULMAX1-RV64-NEXT: and a5, a5, a2
; LMULMAX1-RV64-NEXT: or a4, a5, a4
; LMULMAX1-RV64-NEXT: or t1, a4, a2
; LMULMAX1-RV64-NEXT: slli a4, a1, 8
; LMULMAX1-RV64-NEXT: slli t2, a3, 32
; LMULMAX1-RV64-NEXT: or t0, a4, t0
; LMULMAX1-RV64-NEXT: slli a5, a1, 8
; LMULMAX1-RV64-NEXT: slli t1, a3, 32
; LMULMAX1-RV64-NEXT: and a5, a5, t1
; LMULMAX1-RV64-NEXT: slli a4, a1, 24
; LMULMAX1-RV64-NEXT: slli t2, a3, 40
; LMULMAX1-RV64-NEXT: and a4, a4, t2
; LMULMAX1-RV64-NEXT: slli a2, a1, 24
; LMULMAX1-RV64-NEXT: slli t3, a3, 40
; LMULMAX1-RV64-NEXT: and a2, a2, t3
; LMULMAX1-RV64-NEXT: or a2, a2, a4
; LMULMAX1-RV64-NEXT: slli a4, a1, 40
; LMULMAX1-RV64-NEXT: or a4, a4, a5
; LMULMAX1-RV64-NEXT: slli a5, a1, 40
; LMULMAX1-RV64-NEXT: slli a3, a3, 48
; LMULMAX1-RV64-NEXT: and a4, a4, a3
; LMULMAX1-RV64-NEXT: and a5, a5, a3
; LMULMAX1-RV64-NEXT: slli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a1, a5
; LMULMAX1-RV64-NEXT: or a1, a1, a4
; LMULMAX1-RV64-NEXT: or a1, a1, t0
; LMULMAX1-RV64-NEXT: vsetivli a4, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV64-NEXT: srli a4, a1, 24
; LMULMAX1-RV64-NEXT: and a4, a4, a6
; LMULMAX1-RV64-NEXT: srli a5, a1, 8
; LMULMAX1-RV64-NEXT: and a2, a5, a2
; LMULMAX1-RV64-NEXT: or a2, a2, a4
; LMULMAX1-RV64-NEXT: srli a4, a1, 40
; LMULMAX1-RV64-NEXT: and a4, a4, a7
; LMULMAX1-RV64-NEXT: srli a5, a1, 56
; LMULMAX1-RV64-NEXT: or a4, a4, a5
; LMULMAX1-RV64-NEXT: or a2, a2, a4
; LMULMAX1-RV64-NEXT: slli a4, a1, 8
; LMULMAX1-RV64-NEXT: and a4, a4, t1
; LMULMAX1-RV64-NEXT: slli a5, a1, 24
; LMULMAX1-RV64-NEXT: and a5, a5, t2
; LMULMAX1-RV64-NEXT: or a4, a5, a4
; LMULMAX1-RV64-NEXT: slli a5, a1, 40
; LMULMAX1-RV64-NEXT: and a3, a5, a3
; LMULMAX1-RV64-NEXT: slli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a1, a3
; LMULMAX1-RV64-NEXT: or a1, a1, a4
; LMULMAX1-RV64-NEXT: or a1, a1, a2
; LMULMAX1-RV64-NEXT: or t1, a1, t1
; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v25
; LMULMAX1-RV64-NEXT: srli a4, a2, 40
; LMULMAX1-RV64-NEXT: and a4, a4, a7
; LMULMAX1-RV64-NEXT: srli a1, a2, 56
; LMULMAX1-RV64-NEXT: or a1, a4, a1
; LMULMAX1-RV64-NEXT: srli a4, a2, 24
; LMULMAX1-RV64-NEXT: and a4, a4, a6
; LMULMAX1-RV64-NEXT: srli a5, a2, 8
; LMULMAX1-RV64-NEXT: and a5, a5, t0
; LMULMAX1-RV64-NEXT: or a4, a5, a4
; LMULMAX1-RV64-NEXT: or a1, a4, a1
; LMULMAX1-RV64-NEXT: slli a4, a2, 8
; LMULMAX1-RV64-NEXT: and a4, a4, t2
; LMULMAX1-RV64-NEXT: slli a5, a2, 24
; LMULMAX1-RV64-NEXT: and a5, a5, t3
; LMULMAX1-RV64-NEXT: or a4, a5, a4
; LMULMAX1-RV64-NEXT: slli a5, a2, 40
; LMULMAX1-RV64-NEXT: and a3, a5, a3
; LMULMAX1-RV64-NEXT: slli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a2, a3
; LMULMAX1-RV64-NEXT: or a2, a2, a4
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1
; LMULMAX1-RV64-NEXT: vmv.s.x v25, t1
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1
; LMULMAX1-RV64-NEXT: vse64.v v26, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %x
%b = load <2 x i64>, <2 x i64>* %y
@ -1959,108 +1959,108 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV64-NEXT: andi sp, sp, -32
; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; LMULMAX2-RV64-NEXT: vle64.v v26, (a0)
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: srli a2, a1, 40
; LMULMAX2-RV64-NEXT: lui a3, 16
; LMULMAX2-RV64-NEXT: addiw a7, a3, -256
; LMULMAX2-RV64-NEXT: and a2, a2, a7
; LMULMAX2-RV64-NEXT: srli a3, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a3
; LMULMAX2-RV64-NEXT: srli a3, a1, 24
; LMULMAX2-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX2-RV64-NEXT: srli a1, a2, 24
; LMULMAX2-RV64-NEXT: lui a6, 4080
; LMULMAX2-RV64-NEXT: and a4, a3, a6
; LMULMAX2-RV64-NEXT: srli a5, a1, 8
; LMULMAX2-RV64-NEXT: addi a3, zero, 255
; LMULMAX2-RV64-NEXT: slli t0, a3, 24
; LMULMAX2-RV64-NEXT: and a5, a5, t0
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: or a4, a4, a2
; LMULMAX2-RV64-NEXT: slli a2, a1, 8
; LMULMAX2-RV64-NEXT: slli t1, a3, 32
; LMULMAX2-RV64-NEXT: and a2, a2, t1
; LMULMAX2-RV64-NEXT: slli a5, a1, 24
; LMULMAX2-RV64-NEXT: slli t2, a3, 40
; LMULMAX2-RV64-NEXT: and a5, a5, t2
; LMULMAX2-RV64-NEXT: or a5, a5, a2
; LMULMAX2-RV64-NEXT: slli a2, a1, 40
; LMULMAX2-RV64-NEXT: slli a3, a3, 48
; LMULMAX2-RV64-NEXT: and a2, a2, a3
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a1, a2
; LMULMAX2-RV64-NEXT: or a1, a1, a5
; LMULMAX2-RV64-NEXT: or a1, a1, a4
; LMULMAX2-RV64-NEXT: and a1, a1, a6
; LMULMAX2-RV64-NEXT: srli a3, a2, 8
; LMULMAX2-RV64-NEXT: addi a5, zero, 255
; LMULMAX2-RV64-NEXT: slli a7, a5, 24
; LMULMAX2-RV64-NEXT: and a3, a3, a7
; LMULMAX2-RV64-NEXT: or a3, a3, a1
; LMULMAX2-RV64-NEXT: srli a4, a2, 40
; LMULMAX2-RV64-NEXT: lui a1, 16
; LMULMAX2-RV64-NEXT: addiw t0, a1, -256
; LMULMAX2-RV64-NEXT: and a4, a4, t0
; LMULMAX2-RV64-NEXT: srli a1, a2, 56
; LMULMAX2-RV64-NEXT: or a1, a4, a1
; LMULMAX2-RV64-NEXT: or a1, a3, a1
; LMULMAX2-RV64-NEXT: slli a4, a2, 8
; LMULMAX2-RV64-NEXT: slli t1, a5, 32
; LMULMAX2-RV64-NEXT: and a3, a4, t1
; LMULMAX2-RV64-NEXT: slli a4, a2, 24
; LMULMAX2-RV64-NEXT: slli t2, a5, 40
; LMULMAX2-RV64-NEXT: and a4, a4, t2
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: slli a4, a2, 40
; LMULMAX2-RV64-NEXT: slli a5, a5, 48
; LMULMAX2-RV64-NEXT: and a4, a4, a5
; LMULMAX2-RV64-NEXT: slli a2, a2, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: or a2, a2, a3
; LMULMAX2-RV64-NEXT: or a1, a2, a1
; LMULMAX2-RV64-NEXT: sd a1, 0(sp)
; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 3
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: srli a2, a1, 40
; LMULMAX2-RV64-NEXT: and a2, a2, a7
; LMULMAX2-RV64-NEXT: srli a4, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: srli a4, a1, 24
; LMULMAX2-RV64-NEXT: and a4, a4, a6
; LMULMAX2-RV64-NEXT: srli a5, a1, 8
; LMULMAX2-RV64-NEXT: and a5, a5, t0
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: or a2, a4, a2
; LMULMAX2-RV64-NEXT: slli a4, a1, 8
; LMULMAX2-RV64-NEXT: and a4, a4, t1
; LMULMAX2-RV64-NEXT: slli a5, a1, 24
; LMULMAX2-RV64-NEXT: and a5, a5, t2
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: slli a5, a1, 40
; LMULMAX2-RV64-NEXT: and a5, a5, a3
; LMULMAX2-RV64-NEXT: and a2, a2, t0
; LMULMAX2-RV64-NEXT: srli a3, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a3
; LMULMAX2-RV64-NEXT: srli a3, a1, 24
; LMULMAX2-RV64-NEXT: and a3, a3, a6
; LMULMAX2-RV64-NEXT: srli a4, a1, 8
; LMULMAX2-RV64-NEXT: and a4, a4, a7
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: or a2, a3, a2
; LMULMAX2-RV64-NEXT: slli a3, a1, 8
; LMULMAX2-RV64-NEXT: and a3, a3, t1
; LMULMAX2-RV64-NEXT: slli a4, a1, 24
; LMULMAX2-RV64-NEXT: and a4, a4, t2
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: slli a4, a1, 40
; LMULMAX2-RV64-NEXT: and a4, a4, a5
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a1, a5
; LMULMAX2-RV64-NEXT: or a1, a1, a4
; LMULMAX2-RV64-NEXT: or a1, a1, a3
; LMULMAX2-RV64-NEXT: or a1, a1, a2
; LMULMAX2-RV64-NEXT: sd a1, 24(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 2
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV64-NEXT: srli a2, a1, 40
; LMULMAX2-RV64-NEXT: and a2, a2, a7
; LMULMAX2-RV64-NEXT: srli a4, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: srli a4, a1, 24
; LMULMAX2-RV64-NEXT: and a4, a4, a6
; LMULMAX2-RV64-NEXT: srli a5, a1, 8
; LMULMAX2-RV64-NEXT: and a5, a5, t0
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: or a2, a4, a2
; LMULMAX2-RV64-NEXT: slli a4, a1, 8
; LMULMAX2-RV64-NEXT: and a4, a4, t1
; LMULMAX2-RV64-NEXT: slli a5, a1, 24
; LMULMAX2-RV64-NEXT: and a5, a5, t2
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: slli a5, a1, 40
; LMULMAX2-RV64-NEXT: and a5, a5, a3
; LMULMAX2-RV64-NEXT: and a2, a2, t0
; LMULMAX2-RV64-NEXT: srli a3, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a3
; LMULMAX2-RV64-NEXT: srli a3, a1, 24
; LMULMAX2-RV64-NEXT: and a3, a3, a6
; LMULMAX2-RV64-NEXT: srli a4, a1, 8
; LMULMAX2-RV64-NEXT: and a4, a4, a7
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: or a2, a3, a2
; LMULMAX2-RV64-NEXT: slli a3, a1, 8
; LMULMAX2-RV64-NEXT: and a3, a3, t1
; LMULMAX2-RV64-NEXT: slli a4, a1, 24
; LMULMAX2-RV64-NEXT: and a4, a4, t2
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: slli a4, a1, 40
; LMULMAX2-RV64-NEXT: and a4, a4, a5
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a1, a5
; LMULMAX2-RV64-NEXT: or a1, a1, a4
; LMULMAX2-RV64-NEXT: or a1, a1, a3
; LMULMAX2-RV64-NEXT: or a1, a1, a2
; LMULMAX2-RV64-NEXT: sd a1, 16(sp)
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV64-NEXT: srli a2, a1, 40
; LMULMAX2-RV64-NEXT: and a2, a2, a7
; LMULMAX2-RV64-NEXT: srli a4, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a4
; LMULMAX2-RV64-NEXT: srli a4, a1, 24
; LMULMAX2-RV64-NEXT: and a4, a4, a6
; LMULMAX2-RV64-NEXT: srli a5, a1, 8
; LMULMAX2-RV64-NEXT: and a5, a5, t0
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: or a2, a4, a2
; LMULMAX2-RV64-NEXT: slli a4, a1, 8
; LMULMAX2-RV64-NEXT: and a4, a4, t1
; LMULMAX2-RV64-NEXT: slli a5, a1, 24
; LMULMAX2-RV64-NEXT: and a5, a5, t2
; LMULMAX2-RV64-NEXT: or a4, a5, a4
; LMULMAX2-RV64-NEXT: slli a5, a1, 40
; LMULMAX2-RV64-NEXT: and a3, a5, a3
; LMULMAX2-RV64-NEXT: and a2, a2, t0
; LMULMAX2-RV64-NEXT: srli a3, a1, 56
; LMULMAX2-RV64-NEXT: or a2, a2, a3
; LMULMAX2-RV64-NEXT: srli a3, a1, 24
; LMULMAX2-RV64-NEXT: and a3, a3, a6
; LMULMAX2-RV64-NEXT: srli a4, a1, 8
; LMULMAX2-RV64-NEXT: and a4, a4, a7
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: or a2, a3, a2
; LMULMAX2-RV64-NEXT: slli a3, a1, 8
; LMULMAX2-RV64-NEXT: and a3, a3, t1
; LMULMAX2-RV64-NEXT: slli a4, a1, 24
; LMULMAX2-RV64-NEXT: and a4, a4, t2
; LMULMAX2-RV64-NEXT: or a3, a4, a3
; LMULMAX2-RV64-NEXT: slli a4, a1, 40
; LMULMAX2-RV64-NEXT: and a4, a4, a5
; LMULMAX2-RV64-NEXT: slli a1, a1, 56
; LMULMAX2-RV64-NEXT: or a1, a1, a3
; LMULMAX2-RV64-NEXT: or a1, a1, a4
; LMULMAX2-RV64-NEXT: or a1, a1, a3
; LMULMAX2-RV64-NEXT: or a1, a1, a2
; LMULMAX2-RV64-NEXT: sd a1, 8(sp)
; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu
@ -2193,8 +2193,10 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: addi a6, a0, 16
; LMULMAX1-RV64-NEXT: vle64.v v26, (a6)
; LMULMAX1-RV64-NEXT: vle64.v v27, (a6)
; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v27, 1
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: srli a1, a2, 40
; LMULMAX1-RV64-NEXT: lui a3, 16
@ -2210,49 +2212,49 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV64-NEXT: slli t1, a4, 24
; LMULMAX1-RV64-NEXT: and a5, a5, t1
; LMULMAX1-RV64-NEXT: or a3, a5, a3
; LMULMAX1-RV64-NEXT: or a5, a3, a1
; LMULMAX1-RV64-NEXT: or a3, a3, a1
; LMULMAX1-RV64-NEXT: slli a1, a2, 8
; LMULMAX1-RV64-NEXT: slli t2, a4, 32
; LMULMAX1-RV64-NEXT: and a3, a1, t2
; LMULMAX1-RV64-NEXT: slli a1, a2, 24
; LMULMAX1-RV64-NEXT: and a1, a1, t2
; LMULMAX1-RV64-NEXT: slli a5, a2, 24
; LMULMAX1-RV64-NEXT: slli t3, a4, 40
; LMULMAX1-RV64-NEXT: and a1, a1, t3
; LMULMAX1-RV64-NEXT: or a1, a1, a3
; LMULMAX1-RV64-NEXT: slli a3, a2, 40
; LMULMAX1-RV64-NEXT: and a5, a5, t3
; LMULMAX1-RV64-NEXT: or a5, a5, a1
; LMULMAX1-RV64-NEXT: slli a1, a2, 40
; LMULMAX1-RV64-NEXT: slli a4, a4, 48
; LMULMAX1-RV64-NEXT: and a3, a3, a4
; LMULMAX1-RV64-NEXT: and a1, a1, a4
; LMULMAX1-RV64-NEXT: slli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a2, a3
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: or t4, a1, a5
; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: srli a3, a2, 40
; LMULMAX1-RV64-NEXT: and a3, a3, t0
; LMULMAX1-RV64-NEXT: srli a5, a2, 56
; LMULMAX1-RV64-NEXT: or a3, a3, a5
; LMULMAX1-RV64-NEXT: srli a5, a2, 24
; LMULMAX1-RV64-NEXT: and a5, a5, a7
; LMULMAX1-RV64-NEXT: srli a1, a2, 8
; LMULMAX1-RV64-NEXT: and a1, a1, t1
; LMULMAX1-RV64-NEXT: or a1, a1, a5
; LMULMAX1-RV64-NEXT: or a1, a1, a3
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: and a3, a3, t2
; LMULMAX1-RV64-NEXT: slli a5, a2, 24
; LMULMAX1-RV64-NEXT: and a5, a5, t3
; LMULMAX1-RV64-NEXT: or a3, a5, a3
; LMULMAX1-RV64-NEXT: slli a5, a2, 40
; LMULMAX1-RV64-NEXT: and a5, a5, a4
; LMULMAX1-RV64-NEXT: slli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a2, a5
; LMULMAX1-RV64-NEXT: or a2, a2, a3
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1
; LMULMAX1-RV64-NEXT: vmv.s.x v26, t4
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v27
; LMULMAX1-RV64-NEXT: srli a2, a1, 24
; LMULMAX1-RV64-NEXT: and a2, a2, a7
; LMULMAX1-RV64-NEXT: srli a3, a1, 8
; LMULMAX1-RV64-NEXT: and a3, a3, t1
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: srli a3, a1, 40
; LMULMAX1-RV64-NEXT: and a3, a3, t0
; LMULMAX1-RV64-NEXT: srli a5, a1, 56
; LMULMAX1-RV64-NEXT: or a3, a3, a5
; LMULMAX1-RV64-NEXT: or a2, a2, a3
; LMULMAX1-RV64-NEXT: slli a3, a1, 8
; LMULMAX1-RV64-NEXT: and a3, a3, t2
; LMULMAX1-RV64-NEXT: slli a5, a1, 24
; LMULMAX1-RV64-NEXT: and a5, a5, t3
; LMULMAX1-RV64-NEXT: or a3, a5, a3
; LMULMAX1-RV64-NEXT: slli a5, a1, 40
; LMULMAX1-RV64-NEXT: and a5, a5, a4
; LMULMAX1-RV64-NEXT: slli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a1, a5
; LMULMAX1-RV64-NEXT: or a1, a1, a3
; LMULMAX1-RV64-NEXT: or a1, a1, a2
; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1
; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v25, 1
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v27
; LMULMAX1-RV64-NEXT: srli a2, a1, 40
; LMULMAX1-RV64-NEXT: and a2, a2, t0
; LMULMAX1-RV64-NEXT: srli a3, a1, 56
@ -2273,35 +2275,33 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV64-NEXT: slli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a1, a5
; LMULMAX1-RV64-NEXT: or a1, a1, a3
; LMULMAX1-RV64-NEXT: or t4, a1, a2
; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v25
; LMULMAX1-RV64-NEXT: srli a3, a2, 40
; LMULMAX1-RV64-NEXT: or a1, a1, a2
; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmv.v.x v27, a1
; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV64-NEXT: srli a2, a1, 24
; LMULMAX1-RV64-NEXT: and a2, a2, a7
; LMULMAX1-RV64-NEXT: srli a3, a1, 8
; LMULMAX1-RV64-NEXT: and a3, a3, t1
; LMULMAX1-RV64-NEXT: or a2, a3, a2
; LMULMAX1-RV64-NEXT: srli a3, a1, 40
; LMULMAX1-RV64-NEXT: and a3, a3, t0
; LMULMAX1-RV64-NEXT: srli a5, a2, 56
; LMULMAX1-RV64-NEXT: srli a5, a1, 56
; LMULMAX1-RV64-NEXT: or a3, a3, a5
; LMULMAX1-RV64-NEXT: srli a5, a2, 24
; LMULMAX1-RV64-NEXT: and a5, a5, a7
; LMULMAX1-RV64-NEXT: srli a1, a2, 8
; LMULMAX1-RV64-NEXT: and a1, a1, t1
; LMULMAX1-RV64-NEXT: or a1, a1, a5
; LMULMAX1-RV64-NEXT: or a1, a1, a3
; LMULMAX1-RV64-NEXT: slli a3, a2, 8
; LMULMAX1-RV64-NEXT: or a2, a2, a3
; LMULMAX1-RV64-NEXT: slli a3, a1, 8
; LMULMAX1-RV64-NEXT: and a3, a3, t2
; LMULMAX1-RV64-NEXT: slli a5, a2, 24
; LMULMAX1-RV64-NEXT: slli a5, a1, 24
; LMULMAX1-RV64-NEXT: and a5, a5, t3
; LMULMAX1-RV64-NEXT: or a3, a5, a3
; LMULMAX1-RV64-NEXT: slli a5, a2, 40
; LMULMAX1-RV64-NEXT: slli a5, a1, 40
; LMULMAX1-RV64-NEXT: and a4, a5, a4
; LMULMAX1-RV64-NEXT: slli a2, a2, 56
; LMULMAX1-RV64-NEXT: or a2, a2, a4
; LMULMAX1-RV64-NEXT: or a2, a2, a3
; LMULMAX1-RV64-NEXT: or a1, a2, a1
; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1
; LMULMAX1-RV64-NEXT: vmv.s.x v25, t4
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
; LMULMAX1-RV64-NEXT: slli a1, a1, 56
; LMULMAX1-RV64-NEXT: or a1, a1, a4
; LMULMAX1-RV64-NEXT: or a1, a1, a3
; LMULMAX1-RV64-NEXT: or a1, a1, a2
; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1
; LMULMAX1-RV64-NEXT: vse64.v v27, (a0)
; LMULMAX1-RV64-NEXT: vse64.v v26, (a6)
; LMULMAX1-RV64-NEXT: ret
%a = load <4 x i64>, <4 x i64>* %x

View File

@ -900,27 +900,27 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
; LMULMAX1-NEXT: vle32.v v31, (a1)
; LMULMAX1-NEXT: addi a0, a0, 112
; LMULMAX1-NEXT: vle32.v v24, (a0)
; LMULMAX1-NEXT: ld a1, 0(s0)
; LMULMAX1-NEXT: addi a0, sp, 240
; LMULMAX1-NEXT: vse32.v v15, (a0)
; LMULMAX1-NEXT: addi a0, sp, 224
; LMULMAX1-NEXT: vse32.v v14, (a0)
; LMULMAX1-NEXT: addi a0, sp, 208
; LMULMAX1-NEXT: vse32.v v13, (a0)
; LMULMAX1-NEXT: addi a0, sp, 192
; LMULMAX1-NEXT: vse32.v v12, (a0)
; LMULMAX1-NEXT: addi a0, sp, 176
; LMULMAX1-NEXT: vse32.v v11, (a0)
; LMULMAX1-NEXT: addi a0, sp, 160
; LMULMAX1-NEXT: vse32.v v10, (a0)
; LMULMAX1-NEXT: addi a0, sp, 144
; LMULMAX1-NEXT: vse32.v v9, (a0)
; LMULMAX1-NEXT: ld a0, 0(s0)
; LMULMAX1-NEXT: addi a1, sp, 240
; LMULMAX1-NEXT: vse32.v v15, (a1)
; LMULMAX1-NEXT: addi a1, sp, 224
; LMULMAX1-NEXT: vse32.v v14, (a1)
; LMULMAX1-NEXT: addi a1, sp, 208
; LMULMAX1-NEXT: vse32.v v13, (a1)
; LMULMAX1-NEXT: addi a1, sp, 192
; LMULMAX1-NEXT: vse32.v v12, (a1)
; LMULMAX1-NEXT: addi a1, sp, 176
; LMULMAX1-NEXT: vse32.v v11, (a1)
; LMULMAX1-NEXT: addi a1, sp, 160
; LMULMAX1-NEXT: vse32.v v10, (a1)
; LMULMAX1-NEXT: addi a1, sp, 144
; LMULMAX1-NEXT: vse32.v v9, (a1)
; LMULMAX1-NEXT: addi a1, zero, 42
; LMULMAX1-NEXT: sd a1, 8(sp)
; LMULMAX1-NEXT: sd a0, 0(sp)
; LMULMAX1-NEXT: addi a0, sp, 128
; LMULMAX1-NEXT: vse32.v v8, (a0)
; LMULMAX1-NEXT: addi a0, zero, 42
; LMULMAX1-NEXT: sd a0, 8(sp)
; LMULMAX1-NEXT: addi a0, sp, 128
; LMULMAX1-NEXT: sd a1, 0(sp)
; LMULMAX1-NEXT: addi a1, sp, 128
; LMULMAX1-NEXT: vse32.v v8, (a1)
; LMULMAX1-NEXT: vmv1r.v v8, v25
; LMULMAX1-NEXT: vmv1r.v v9, v26
; LMULMAX1-NEXT: vmv1r.v v10, v27

View File

@ -3657,23 +3657,23 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX2-RV32-NEXT: sw zero, 12(sp)
; LMULMAX2-RV32-NEXT: sw zero, 4(sp)
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: lui a1, 349525
; LMULMAX2-RV32-NEXT: addi a4, a1, 1365
; LMULMAX2-RV32-NEXT: lui a1, 209715
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_2
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -3698,8 +3698,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB3_3
; LMULMAX2-RV32-NEXT: .LBB3_2:
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -3890,23 +3890,23 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX1-RV32-NEXT: addi a6, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX1-RV32-NEXT: lui a1, 349525
; LMULMAX1-RV32-NEXT: addi a4, a1, 1365
; LMULMAX1-RV32-NEXT: lui a1, 209715
; LMULMAX1-RV32-NEXT: addi a3, a1, 819
; LMULMAX1-RV32-NEXT: lui a1, 61681
; LMULMAX1-RV32-NEXT: addi a7, a1, -241
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: addi a6, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a2, a2, 257
; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_2
; LMULMAX1-RV32-NEXT: lui a1, 4112
; LMULMAX1-RV32-NEXT: addi a2, a1, 257
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: srli a1, a5, 1
; LMULMAX1-RV32-NEXT: or a1, a5, a1
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 4
@ -3931,8 +3931,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a5, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB3_3
; LMULMAX1-RV32-NEXT: .LBB3_2:
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a1, a5, 1
; LMULMAX1-RV32-NEXT: or a1, a5, a1
; LMULMAX1-RV32-NEXT: srli a5, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 4
@ -11110,23 +11110,23 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: sw zero, 20(sp)
; LMULMAX2-RV32-NEXT: sw zero, 12(sp)
; LMULMAX2-RV32-NEXT: sw zero, 4(sp)
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: lui a1, 349525
; LMULMAX2-RV32-NEXT: addi a4, a1, 1365
; LMULMAX2-RV32-NEXT: lui a1, 209715
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_2
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11151,8 +11151,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_3
; LMULMAX2-RV32-NEXT: .LBB7_2:
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11529,28 +11529,28 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi sp, sp, -32
; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV32-NEXT: addi a6, a0, 16
; LMULMAX1-RV32-NEXT: vle64.v v26, (a6)
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV32-NEXT: sw zero, 28(sp)
; LMULMAX1-RV32-NEXT: sw zero, 20(sp)
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a7, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27
; LMULMAX1-RV32-NEXT: lui a2, 349525
; LMULMAX1-RV32-NEXT: addi a5, a2, 1365
; LMULMAX1-RV32-NEXT: lui a2, 209715
; LMULMAX1-RV32-NEXT: addi a4, a2, 819
; LMULMAX1-RV32-NEXT: lui a2, 61681
; LMULMAX1-RV32-NEXT: addi t0, a2, -241
; LMULMAX1-RV32-NEXT: lui a3, 4112
; LMULMAX1-RV32-NEXT: addi a7, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v27
; LMULMAX1-RV32-NEXT: addi a3, a3, 257
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_2
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: addi a3, a2, 257
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4
@ -11575,8 +11575,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB7_3
; LMULMAX1-RV32-NEXT: .LBB7_2:
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4
@ -11660,15 +11660,14 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: sw a1, 24(sp)
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_8
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8
; LMULMAX1-RV32-NEXT: # %bb.7:
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4
@ -11693,8 +11692,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB7_9
; LMULMAX1-RV32-NEXT: .LBB7_8:
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4

View File

@ -2195,10 +2195,10 @@ define void @cttz_v4i32(<4 x i32>* %x, <4 x i32>* %y) {
; LMULMAX2-RV64-NEXT: .cfi_def_cfa_offset 16
; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX2-RV64-NEXT: vle32.v v25, (a0)
; LMULMAX2-RV64-NEXT: addi a1, zero, 1
; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e32,m1,ta,mu
; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu
; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v25, 3
; LMULMAX2-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX2-RV64-NEXT: addi a1, zero, 1
; LMULMAX2-RV64-NEXT: slli a6, a1, 32
; LMULMAX2-RV64-NEXT: or a2, a2, a6
; LMULMAX2-RV64-NEXT: addi a3, a2, -1
@ -2407,10 +2407,10 @@ define void @cttz_v4i32(<4 x i32>* %x, <4 x i32>* %y) {
; LMULMAX1-RV64-NEXT: .cfi_def_cfa_offset 16
; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
; LMULMAX1-RV64-NEXT: addi a1, zero, 1
; LMULMAX1-RV64-NEXT: vsetivli a2, 1, e32,m1,ta,mu
; LMULMAX1-RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu
; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v25, 3
; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV64-NEXT: addi a1, zero, 1
; LMULMAX1-RV64-NEXT: slli a6, a1, 32
; LMULMAX1-RV64-NEXT: or a2, a2, a6
; LMULMAX1-RV64-NEXT: addi a3, a2, -1
@ -2537,24 +2537,24 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX2-RV32-NEXT: sw zero, 12(sp)
; LMULMAX2-RV32-NEXT: sw zero, 4(sp)
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: lui a1, 349525
; LMULMAX2-RV32-NEXT: addi a4, a1, 1365
; LMULMAX2-RV32-NEXT: lui a1, 209715
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a5, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
@ -2570,9 +2570,9 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB3_3
; LMULMAX2-RV32-NEXT: .LBB3_2:
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a5, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
@ -2718,24 +2718,24 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX1-RV32-NEXT: addi a6, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX1-RV32-NEXT: lui a1, 349525
; LMULMAX1-RV32-NEXT: addi a4, a1, 1365
; LMULMAX1-RV32-NEXT: lui a1, 209715
; LMULMAX1-RV32-NEXT: addi a3, a1, 819
; LMULMAX1-RV32-NEXT: lui a1, 61681
; LMULMAX1-RV32-NEXT: addi a7, a1, -241
; LMULMAX1-RV32-NEXT: lui a1, 4112
; LMULMAX1-RV32-NEXT: addi a2, a1, 257
; LMULMAX1-RV32-NEXT: addi a6, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: addi a2, a2, 257
; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: addi a5, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a5
; LMULMAX1-RV32-NEXT: addi a1, a5, -1
; LMULMAX1-RV32-NEXT: not a5, a5
; LMULMAX1-RV32-NEXT: and a1, a5, a1
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: and a5, a5, a4
; LMULMAX1-RV32-NEXT: sub a1, a1, a5
@ -2751,9 +2751,9 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a5, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB3_3
; LMULMAX1-RV32-NEXT: .LBB3_2:
; LMULMAX1-RV32-NEXT: addi a1, a5, -1
; LMULMAX1-RV32-NEXT: not a5, a5
; LMULMAX1-RV32-NEXT: and a1, a5, a1
; LMULMAX1-RV32-NEXT: addi a5, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: and a5, a5, a4
; LMULMAX1-RV32-NEXT: sub a1, a1, a5
@ -7060,10 +7060,10 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
; LMULMAX2-RV64-NEXT: andi sp, sp, -32
; LMULMAX2-RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu
; LMULMAX2-RV64-NEXT: vle32.v v26, (a0)
; LMULMAX2-RV64-NEXT: addi a1, zero, 1
; LMULMAX2-RV64-NEXT: vsetivli a2, 1, e32,m2,ta,mu
; LMULMAX2-RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu
; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 7
; LMULMAX2-RV64-NEXT: vmv.x.s a2, v28
; LMULMAX2-RV64-NEXT: addi a1, zero, 1
; LMULMAX2-RV64-NEXT: slli a6, a1, 32
; LMULMAX2-RV64-NEXT: or a2, a2, a6
; LMULMAX2-RV64-NEXT: addi a3, a2, -1
@ -7646,24 +7646,24 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: sw zero, 20(sp)
; LMULMAX2-RV32-NEXT: sw zero, 12(sp)
; LMULMAX2-RV32-NEXT: sw zero, 4(sp)
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: lui a1, 349525
; LMULMAX2-RV32-NEXT: addi a4, a1, 1365
; LMULMAX2-RV32-NEXT: lui a1, 209715
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
; LMULMAX2-RV32-NEXT: addi a6, zero, 32
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a5, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
@ -7679,9 +7679,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_3
; LMULMAX2-RV32-NEXT: .LBB7_2:
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a5, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
@ -7961,29 +7961,29 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi sp, sp, -32
; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV32-NEXT: addi a6, a0, 16
; LMULMAX1-RV32-NEXT: vle64.v v26, (a6)
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV32-NEXT: sw zero, 28(sp)
; LMULMAX1-RV32-NEXT: sw zero, 20(sp)
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a7, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27
; LMULMAX1-RV32-NEXT: lui a2, 349525
; LMULMAX1-RV32-NEXT: addi a5, a2, 1365
; LMULMAX1-RV32-NEXT: lui a2, 209715
; LMULMAX1-RV32-NEXT: addi a4, a2, 819
; LMULMAX1-RV32-NEXT: lui a2, 61681
; LMULMAX1-RV32-NEXT: addi t0, a2, -241
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: addi a3, a2, 257
; LMULMAX1-RV32-NEXT: addi a7, zero, 32
; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v27
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2
; LMULMAX1-RV32-NEXT: lui a3, 4112
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: addi a3, a3, 257
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: addi a1, a2, -1
; LMULMAX1-RV32-NEXT: not a2, a2
; LMULMAX1-RV32-NEXT: and a1, a2, a1
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: and a2, a2, a5
; LMULMAX1-RV32-NEXT: sub a1, a1, a2
@ -7999,9 +7999,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB7_3
; LMULMAX1-RV32-NEXT: .LBB7_2:
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a2
; LMULMAX1-RV32-NEXT: addi a1, a2, -1
; LMULMAX1-RV32-NEXT: not a2, a2
; LMULMAX1-RV32-NEXT: and a1, a2, a1
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: and a2, a2, a5
; LMULMAX1-RV32-NEXT: sub a1, a1, a2
@ -8060,10 +8060,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: sw a1, 24(sp)
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8
; LMULMAX1-RV32-NEXT: # %bb.7:

View File

@ -153,12 +153,11 @@ define <4 x i64> @sextload_v4i8_v4i64(<4 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v9, v25
; LMULMAX1-NEXT: vsext.vf8 v9, v26
; LMULMAX1-NEXT: vsext.vf8 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v4i8_v4i64:
@ -178,12 +177,11 @@ define <4 x i64> @zextload_v4i8_v4i64(<4 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v9, v25
; LMULMAX1-NEXT: vzext.vf8 v9, v26
; LMULMAX1-NEXT: vzext.vf8 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v4i8_v4i64:
@ -229,12 +227,11 @@ define <8 x i32> @sextload_v8i8_v8i32(<8 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v9, v25
; LMULMAX1-NEXT: vsext.vf4 v9, v26
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v8i8_v8i32:
@ -254,12 +251,11 @@ define <8 x i32> @zextload_v8i8_v8i32(<8 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v9, v25
; LMULMAX1-NEXT: vzext.vf4 v9, v26
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v8i8_v8i32:
@ -279,8 +275,6 @@ define <8 x i64> @sextload_v8i8_v8i64(<8 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -290,9 +284,10 @@ define <8 x i64> @sextload_v8i8_v8i64(<8 x i8>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v11, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v9, v25
; LMULMAX1-NEXT: vsext.vf8 v9, v26
; LMULMAX1-NEXT: vsext.vf8 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v8i8_v8i64:
@ -312,8 +307,6 @@ define <8 x i64> @zextload_v8i8_v8i64(<8 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -323,9 +316,10 @@ define <8 x i64> @zextload_v8i8_v8i64(<8 x i8>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v11, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v9, v25
; LMULMAX1-NEXT: vzext.vf8 v9, v26
; LMULMAX1-NEXT: vzext.vf8 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v8i8_v8i64:
@ -345,12 +339,11 @@ define <16 x i16> @sextload_v16i8_v16i16(<16 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v9, v25
; LMULMAX1-NEXT: vsext.vf2 v9, v26
; LMULMAX1-NEXT: vsext.vf2 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v16i8_v16i16:
@ -370,12 +363,11 @@ define <16 x i16> @zextload_v16i8_v16i16(<16 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
; LMULMAX1-NEXT: vsetivli a0, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v9, v25
; LMULMAX1-NEXT: vzext.vf2 v9, v26
; LMULMAX1-NEXT: vzext.vf2 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v16i8_v16i16:
@ -395,8 +387,6 @@ define <16 x i32> @sextload_v16i8_v16i32(<16 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
@ -406,9 +396,10 @@ define <16 x i32> @sextload_v16i8_v16i32(<16 x i8>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v11, v26
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v9, v25
; LMULMAX1-NEXT: vsext.vf4 v9, v26
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v16i8_v16i32:
@ -428,8 +419,6 @@ define <16 x i32> @zextload_v16i8_v16i32(<16 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
@ -439,9 +428,10 @@ define <16 x i32> @zextload_v16i8_v16i32(<16 x i8>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v11, v26
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v9, v25
; LMULMAX1-NEXT: vzext.vf4 v9, v26
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v16i8_v16i32:
@ -461,8 +451,6 @@ define <16 x i64> @sextload_v16i8_v16i64(<16 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -480,29 +468,29 @@ define <16 x i64> @sextload_v16i8_v16i64(<16 x i8>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v9, v27
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v10, v25
; LMULMAX1-NEXT: vsext.vf8 v10, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v11, v25
; LMULMAX1-NEXT: vsext.vf8 v11, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf8 v13, v25
; LMULMAX1-NEXT: vsext.vf8 v13, v26
; LMULMAX1-NEXT: vsext.vf8 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v16i8_v16i64:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX4-NEXT: vle8.v v25, (a0)
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vsext.vf8 v8, v25
; LMULMAX4-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX4-NEXT: vslidedown.vi v25, v25, 8
; LMULMAX4-NEXT: vslidedown.vi v26, v25, 8
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vsext.vf8 v12, v25
; LMULMAX4-NEXT: vsext.vf8 v12, v26
; LMULMAX4-NEXT: vsext.vf8 v8, v25
; LMULMAX4-NEXT: ret
%y = load <16 x i8>, <16 x i8>* %x
%z = sext <16 x i8> %y to <16 x i64>
@ -514,8 +502,6 @@ define <16 x i64> @zextload_v16i8_v16i64(<16 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 8
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -533,29 +519,29 @@ define <16 x i64> @zextload_v16i8_v16i64(<16 x i8>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v9, v27
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v10, v25
; LMULMAX1-NEXT: vzext.vf8 v10, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v11, v25
; LMULMAX1-NEXT: vzext.vf8 v11, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf8 v13, v25
; LMULMAX1-NEXT: vzext.vf8 v13, v26
; LMULMAX1-NEXT: vzext.vf8 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v16i8_v16i64:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX4-NEXT: vle8.v v25, (a0)
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vzext.vf8 v8, v25
; LMULMAX4-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX4-NEXT: vslidedown.vi v25, v25, 8
; LMULMAX4-NEXT: vslidedown.vi v26, v25, 8
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vzext.vf8 v12, v25
; LMULMAX4-NEXT: vzext.vf8 v12, v26
; LMULMAX4-NEXT: vzext.vf8 v8, v25
; LMULMAX4-NEXT: ret
%y = load <16 x i8>, <16 x i8>* %x
%z = zext <16 x i8> %y to <16 x i64>
@ -692,12 +678,11 @@ define <4 x i64> @sextload_v4i16_v4i64(<4 x i16>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v9, v25
; LMULMAX1-NEXT: vsext.vf4 v9, v26
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v4i16_v4i64:
@ -717,12 +702,11 @@ define <4 x i64> @zextload_v4i16_v4i64(<4 x i16>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v9, v25
; LMULMAX1-NEXT: vzext.vf4 v9, v26
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v4i16_v4i64:
@ -755,12 +739,11 @@ define <8 x i32> @sextload_v8i16_v8i32(<8 x i16>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v9, v25
; LMULMAX1-NEXT: vsext.vf2 v9, v26
; LMULMAX1-NEXT: vsext.vf2 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v8i16_v8i32:
@ -780,12 +763,11 @@ define <8 x i32> @zextload_v8i16_v8i32(<8 x i16>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v9, v25
; LMULMAX1-NEXT: vzext.vf2 v9, v26
; LMULMAX1-NEXT: vzext.vf2 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v8i16_v8i32:
@ -805,8 +787,6 @@ define <8 x i64> @sextload_v8i16_v8i64(<8 x i16>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -816,9 +796,10 @@ define <8 x i64> @sextload_v8i16_v8i64(<8 x i16>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v11, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v9, v25
; LMULMAX1-NEXT: vsext.vf4 v9, v26
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v8i16_v8i64:
@ -838,8 +819,6 @@ define <8 x i64> @zextload_v8i16_v8i64(<8 x i16>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -849,9 +828,10 @@ define <8 x i64> @zextload_v8i16_v8i64(<8 x i16>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v11, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v9, v25
; LMULMAX1-NEXT: vzext.vf4 v9, v26
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v8i16_v8i64:
@ -901,17 +881,16 @@ define <16 x i32> @sextload_v16i16_v16i32(<16 x i16>* %x) {
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle16.v v26, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v9, v27
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v11, v27
; LMULMAX1-NEXT: vsext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v9, v25
; LMULMAX1-NEXT: vsext.vf2 v10, v26
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v26, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v11, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v16i16_v16i32:
@ -933,17 +912,16 @@ define <16 x i32> @zextload_v16i16_v16i32(<16 x i16>* %x) {
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle16.v v26, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v9, v27
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v11, v27
; LMULMAX1-NEXT: vzext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v9, v25
; LMULMAX1-NEXT: vzext.vf2 v10, v26
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v26, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v11, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v16i16_v16i32:
@ -965,8 +943,6 @@ define <16 x i64> @sextload_v16i16_v16i64(<16 x i16>* %x) {
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle16.v v26, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -975,7 +951,6 @@ define <16 x i64> @sextload_v16i16_v16i64(<16 x i16>* %x) {
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v11, v27
; LMULMAX1-NEXT: vsext.vf4 v12, v26
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -985,25 +960,26 @@ define <16 x i64> @sextload_v16i16_v16i64(<16 x i16>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v15, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v9, v25
; LMULMAX1-NEXT: vsext.vf4 v9, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v13, v25
; LMULMAX1-NEXT: vsext.vf4 v13, v27
; LMULMAX1-NEXT: vsext.vf4 v8, v25
; LMULMAX1-NEXT: vsext.vf4 v12, v26
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v16i16_v16i64:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
; LMULMAX4-NEXT: vle16.v v26, (a0)
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vsext.vf4 v8, v26
; LMULMAX4-NEXT: vsetivli a0, 8, e16,m2,ta,mu
; LMULMAX4-NEXT: vslidedown.vi v26, v26, 8
; LMULMAX4-NEXT: vslidedown.vi v28, v26, 8
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vsext.vf4 v12, v26
; LMULMAX4-NEXT: vsext.vf4 v12, v28
; LMULMAX4-NEXT: vsext.vf4 v8, v26
; LMULMAX4-NEXT: ret
%y = load <16 x i16>, <16 x i16>* %x
%z = sext <16 x i16> %y to <16 x i64>
@ -1017,8 +993,6 @@ define <16 x i64> @zextload_v16i16_v16i64(<16 x i16>* %x) {
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle16.v v26, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -1027,7 +1001,6 @@ define <16 x i64> @zextload_v16i16_v16i64(<16 x i16>* %x) {
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v11, v27
; LMULMAX1-NEXT: vzext.vf4 v12, v26
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -1037,25 +1010,26 @@ define <16 x i64> @zextload_v16i16_v16i64(<16 x i16>* %x) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v15, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v9, v25
; LMULMAX1-NEXT: vzext.vf4 v9, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v13, v25
; LMULMAX1-NEXT: vzext.vf4 v13, v27
; LMULMAX1-NEXT: vzext.vf4 v8, v25
; LMULMAX1-NEXT: vzext.vf4 v12, v26
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v16i16_v16i64:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
; LMULMAX4-NEXT: vle16.v v26, (a0)
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vzext.vf4 v8, v26
; LMULMAX4-NEXT: vsetivli a0, 8, e16,m2,ta,mu
; LMULMAX4-NEXT: vslidedown.vi v26, v26, 8
; LMULMAX4-NEXT: vslidedown.vi v28, v26, 8
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vzext.vf4 v12, v26
; LMULMAX4-NEXT: vzext.vf4 v12, v28
; LMULMAX4-NEXT: vzext.vf4 v8, v26
; LMULMAX4-NEXT: ret
%y = load <16 x i16>, <16 x i16>* %x
%z = zext <16 x i16> %y to <16 x i64>
@ -1149,12 +1123,11 @@ define <4 x i64> @sextload_v4i32_v4i64(<4 x i32>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vle32.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v9, v25
; LMULMAX1-NEXT: vsext.vf2 v9, v26
; LMULMAX1-NEXT: vsext.vf2 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v4i32_v4i64:
@ -1174,12 +1147,11 @@ define <4 x i64> @zextload_v4i32_v4i64(<4 x i32>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vle32.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v9, v25
; LMULMAX1-NEXT: vzext.vf2 v9, v26
; LMULMAX1-NEXT: vzext.vf2 v8, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v4i32_v4i64:
@ -1264,17 +1236,16 @@ define <8 x i64> @sextload_v8i32_v8i64(<8 x i32>* %x) {
; LMULMAX1-NEXT: vle32.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle32.v v26, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v9, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v11, v27
; LMULMAX1-NEXT: vsext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v9, v25
; LMULMAX1-NEXT: vsext.vf2 v10, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v11, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v8i32_v8i64:
@ -1296,17 +1267,16 @@ define <8 x i64> @zextload_v8i32_v8i64(<8 x i32>* %x) {
; LMULMAX1-NEXT: vle32.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle32.v v26, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v9, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v11, v27
; LMULMAX1-NEXT: vzext.vf2 v8, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v9, v25
; LMULMAX1-NEXT: vzext.vf2 v10, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v11, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v8i32_v8i64:
@ -1424,39 +1394,37 @@ define <16 x i64> @sextload_v16i32_v16i64(<16 x i32>* %x) {
; LMULMAX1-NEXT: vle32.v v27, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle32.v v28, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v29, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v9, v29
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v29, v28, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v11, v29
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v29, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v13, v29
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v29, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v15, v29
; LMULMAX1-NEXT: vsext.vf2 v8, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v9, v27
; LMULMAX1-NEXT: vsext.vf2 v10, v28
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v28, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v11, v27
; LMULMAX1-NEXT: vsext.vf2 v12, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v13, v26
; LMULMAX1-NEXT: vsext.vf2 v14, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf2 v15, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: sextload_v16i32_v16i64:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu
; LMULMAX4-NEXT: vle32.v v28, (a0)
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vsext.vf2 v8, v28
; LMULMAX4-NEXT: vsetivli a0, 8, e32,m4,ta,mu
; LMULMAX4-NEXT: vslidedown.vi v28, v28, 8
; LMULMAX4-NEXT: vslidedown.vi v8, v28, 8
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vsext.vf2 v12, v28
; LMULMAX4-NEXT: vsext.vf2 v12, v8
; LMULMAX4-NEXT: vsext.vf2 v8, v28
; LMULMAX4-NEXT: ret
%y = load <16 x i32>, <16 x i32>* %x
%z = sext <16 x i32> %y to <16 x i64>
@ -1474,39 +1442,37 @@ define <16 x i64> @zextload_v16i32_v16i64(<16 x i32>* %x) {
; LMULMAX1-NEXT: vle32.v v27, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle32.v v28, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v29, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v9, v29
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v29, v28, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v11, v29
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v29, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v13, v29
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v29, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v15, v29
; LMULMAX1-NEXT: vzext.vf2 v8, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v9, v27
; LMULMAX1-NEXT: vzext.vf2 v10, v28
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v28, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v11, v27
; LMULMAX1-NEXT: vzext.vf2 v12, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v13, v26
; LMULMAX1-NEXT: vzext.vf2 v14, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf2 v15, v25
; LMULMAX1-NEXT: ret
;
; LMULMAX4-LABEL: zextload_v16i32_v16i64:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu
; LMULMAX4-NEXT: vle32.v v28, (a0)
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vzext.vf2 v8, v28
; LMULMAX4-NEXT: vsetivli a0, 8, e32,m4,ta,mu
; LMULMAX4-NEXT: vslidedown.vi v28, v28, 8
; LMULMAX4-NEXT: vslidedown.vi v8, v28, 8
; LMULMAX4-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; LMULMAX4-NEXT: vzext.vf2 v12, v28
; LMULMAX4-NEXT: vzext.vf2 v12, v8
; LMULMAX4-NEXT: vzext.vf2 v8, v28
; LMULMAX4-NEXT: ret
%y = load <16 x i32>, <16 x i32>* %x
%z = zext <16 x i32> %y to <16 x i64>
@ -2013,19 +1979,19 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) {
; LMULMAX4-LABEL: truncstore_v16i64_v16i16:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0
; LMULMAX4-NEXT: vnsrl.wi v26, v12, 0
; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX4-NEXT: vnsrl.wi v28, v26, 0
; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
; LMULMAX4-NEXT: vnsrl.wi v26, v8, 0
; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX4-NEXT: vnsrl.wi v30, v26, 0
; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
; LMULMAX4-NEXT: vmv.v.i v26, 0
; LMULMAX4-NEXT: vsetivli a1, 8, e16,m2,tu,mu
; LMULMAX4-NEXT: vslideup.vi v26, v28, 0
; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0
; LMULMAX4-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX4-NEXT: vnsrl.wi v30, v28, 0
; LMULMAX4-NEXT: vslideup.vi v26, v30, 0
; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,tu,mu
; LMULMAX4-NEXT: vslideup.vi v26, v30, 8
; LMULMAX4-NEXT: vslideup.vi v26, v28, 8
; LMULMAX4-NEXT: vsetivli a1, 16, e16,m2,ta,mu
; LMULMAX4-NEXT: vse16.v v26, (a0)
; LMULMAX4-NEXT: ret
@ -2087,13 +2053,12 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, <16 x i32>* %z) {
; LMULMAX4-LABEL: truncstore_v16i64_v16i32:
; LMULMAX4: # %bb.0:
; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
; LMULMAX4-NEXT: vnsrl.wi v28, v8, 0
; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0
; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0
; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu
; LMULMAX4-NEXT: vmv.v.i v8, 0
; LMULMAX4-NEXT: vsetivli a1, 8, e32,m4,tu,mu
; LMULMAX4-NEXT: vslideup.vi v8, v28, 0
; LMULMAX4-NEXT: vsetivli a1, 8, e32,m2,ta,mu
; LMULMAX4-NEXT: vnsrl.wi v28, v12, 0
; LMULMAX4-NEXT: vslideup.vi v8, v12, 0
; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,tu,mu
; LMULMAX4-NEXT: vslideup.vi v8, v28, 8
; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu

View File

@ -76,11 +76,11 @@ define i64 @extractelt_v2i64(<2 x i64>* %x) nounwind {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: addi a0, zero, 32
; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v26, v25, a0
; RV32-NEXT: vmv.x.s a1, v26
; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
; RV32-NEXT: ret
;
; RV64-LABEL: extractelt_v2i64:

View File

@ -52,12 +52,11 @@ define void @fpext_v8f16_v8f32(<8 x half>* %x, <8 x float>* %y) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a2, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vle16.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25
; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v26
; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v25
; LMULMAX1-NEXT: addi a0, a1, 16
; LMULMAX1-NEXT: vsetivli a2, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vse32.v v27, (a0)
@ -91,28 +90,28 @@ define void @fpext_v8f16_v8f64(<8 x half>* %x, <8 x double>* %y) {
; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v26, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v28, v27
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25
; LMULMAX1-NEXT: vfwcvt.f.f.v v28, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v29, v27
; LMULMAX1-NEXT: vfwcvt.f.f.v v29, v28
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v25
; LMULMAX1-NEXT: vfwcvt.f.f.v v28, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v25, v27
; LMULMAX1-NEXT: vfwcvt.f.f.v v27, v28
; LMULMAX1-NEXT: vsetivli a0, 2, e16,mf4,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v28, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.f.f.v v25, v28
; LMULMAX1-NEXT: addi a0, a1, 48
; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: vse64.v v27, (a0)
; LMULMAX1-NEXT: addi a0, a1, 32
; LMULMAX1-NEXT: vse64.v v29, (a0)
; LMULMAX1-NEXT: vse64.v v28, (a1)
; LMULMAX1-NEXT: vse64.v v25, (a1)
; LMULMAX1-NEXT: addi a0, a1, 16
; LMULMAX1-NEXT: vse64.v v26, (a0)
; LMULMAX1-NEXT: ret

View File

@ -1528,10 +1528,10 @@ define void @fcmp_ord_fv_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 4, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vmfeq.vv v27, v25, v25
; CHECK-NEXT: vmfeq.vf v25, v26, fa0
; CHECK-NEXT: vmfeq.vf v27, v26, fa0
; CHECK-NEXT: vmfeq.vv v26, v25, v25
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; CHECK-NEXT: vmand.mm v0, v25, v27
; CHECK-NEXT: vmand.mm v0, v27, v26
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
@ -1556,10 +1556,10 @@ define void @fcmp_uno_fv_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vmfne.vv v27, v25, v25
; CHECK-NEXT: vmfne.vf v25, v26, fa0
; CHECK-NEXT: vmfne.vf v27, v26, fa0
; CHECK-NEXT: vmfne.vv v26, v25, v25
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v0, v25, v27
; CHECK-NEXT: vmor.mm v0, v27, v26
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu

View File

@ -111,21 +111,21 @@ define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) {
; RV32-LABEL: vrgather_shuffle_vv_v4f64:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, zero, 1
; RV32-NEXT: addi a1, zero, 8
; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu
; RV32-NEXT: vmv.s.x v25, a0
; RV32-NEXT: vmv.v.i v28, 0
; RV32-NEXT: vsetivli a0, 4, e16,m1,tu,mu
; RV32-NEXT: vslideup.vi v28, v25, 3
; RV32-NEXT: lui a0, %hi(.LCPI6_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0)
; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu
; RV32-NEXT: vle16.v v25, (a0)
; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; RV32-NEXT: vrgatherei16.vv v26, v8, v25
; RV32-NEXT: addi a0, zero, 8
; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: addi a0, zero, 1
; RV32-NEXT: vsetivli a1, 4, e16,m1,ta,mu
; RV32-NEXT: vmv.s.x v25, a0
; RV32-NEXT: vmv.v.i v28, 0
; RV32-NEXT: vsetivli a0, 4, e16,m1,tu,mu
; RV32-NEXT: vslideup.vi v28, v25, 3
; RV32-NEXT: vsetivli a0, 4, e64,m2,tu,mu
; RV32-NEXT: vrgatherei16.vv v26, v10, v28, v0.t
; RV32-NEXT: vmv2r.v v8, v26
@ -139,14 +139,14 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y)
; RV64-NEXT: vmv.v.i v28, 0
; RV64-NEXT: vsetivli a0, 4, e64,m2,tu,mu
; RV64-NEXT: vslideup.vi v28, v26, 3
; RV64-NEXT: addi a0, zero, 8
; RV64-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: lui a0, %hi(.LCPI6_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0)
; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; RV64-NEXT: vle64.v v30, (a0)
; RV64-NEXT: vrgather.vv v26, v8, v30
; RV64-NEXT: addi a0, zero, 8
; RV64-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: vsetivli a0, 4, e64,m2,tu,mu
; RV64-NEXT: vrgather.vv v26, v10, v28, v0.t
; RV64-NEXT: vmv2r.v v8, v26

View File

@ -160,8 +160,8 @@ define void @splat_zero_16f16(<16 x half>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x half> undef, half 0.0, i32 0
@ -182,8 +182,8 @@ define void @splat_zero_v8f32(<8 x float>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x float> undef, float 0.0, i32 0
@ -204,8 +204,8 @@ define void @splat_zero_v4f64(<4 x double>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse64.v v25, (a1)
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <4 x double> undef, double 0.0, i32 0

View File

@ -134,17 +134,16 @@ define void @fp2si_v8f32_v8i64(<8 x float>* %x, <8 x i64>* %y) {
; LMULMAX1-NEXT: addi a2, a0, 16
; LMULMAX1-NEXT: vle32.v v25, (a2)
; LMULMAX1-NEXT: vle32.v v26, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v28, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v29, v27
; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v27, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v28, v25
; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v25, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.rtz.x.f.v v29, v26
; LMULMAX1-NEXT: addi a0, a1, 16
; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vse64.v v29, (a0)
@ -176,17 +175,16 @@ define void @fp2ui_v8f32_v8i64(<8 x float>* %x, <8 x i64>* %y) {
; LMULMAX1-NEXT: addi a2, a0, 16
; LMULMAX1-NEXT: vle32.v v25, (a2)
; LMULMAX1-NEXT: vle32.v v26, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v28, v27
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v29, v27
; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v27, v25
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v28, v25
; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v25, v26
; LMULMAX1-NEXT: vsetivli a0, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v26, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
; LMULMAX1-NEXT: vfwcvt.rtz.xu.f.v v29, v26
; LMULMAX1-NEXT: addi a0, a1, 16
; LMULMAX1-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vse64.v v29, (a0)

View File

@ -138,23 +138,23 @@ define void @si2fp_v8i16_v8f64(<8 x i16>* %x, <8 x double>* %y) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v27, v26
; LMULMAX1-NEXT: vfcvt.f.x.v v26, v27
; LMULMAX1-NEXT: vsext.vf4 v27, v25
; LMULMAX1-NEXT: vfcvt.f.x.v v27, v27
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v28, v25
; LMULMAX1-NEXT: vsext.vf4 v28, v27
; LMULMAX1-NEXT: vfcvt.f.x.v v28, v28
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v29, v27
; LMULMAX1-NEXT: vfcvt.f.x.v v27, v29
; LMULMAX1-NEXT: vsext.vf4 v29, v25
; LMULMAX1-NEXT: vfcvt.f.x.v v25, v29
; LMULMAX1-NEXT: addi a0, a1, 48
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: vse64.v v27, (a0)
; LMULMAX1-NEXT: addi a0, a1, 32
; LMULMAX1-NEXT: vse64.v v28, (a0)
; LMULMAX1-NEXT: vse64.v v27, (a1)
; LMULMAX1-NEXT: vse64.v v25, (a1)
; LMULMAX1-NEXT: addi a0, a1, 16
; LMULMAX1-NEXT: vse64.v v26, (a0)
; LMULMAX1-NEXT: ret
@ -184,23 +184,23 @@ define void @ui2fp_v8i16_v8f64(<8 x i16>* %x, <8 x double>* %y) {
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v27, v26
; LMULMAX1-NEXT: vfcvt.f.xu.v v26, v27
; LMULMAX1-NEXT: vzext.vf4 v27, v25
; LMULMAX1-NEXT: vfcvt.f.xu.v v27, v27
; LMULMAX1-NEXT: vsetivli a0, 4, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v28, v25
; LMULMAX1-NEXT: vzext.vf4 v28, v27
; LMULMAX1-NEXT: vfcvt.f.xu.v v28, v28
; LMULMAX1-NEXT: vsetivli a0, 2, e16,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 2
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vzext.vf4 v29, v27
; LMULMAX1-NEXT: vfcvt.f.xu.v v27, v29
; LMULMAX1-NEXT: vzext.vf4 v29, v25
; LMULMAX1-NEXT: vfcvt.f.xu.v v25, v29
; LMULMAX1-NEXT: addi a0, a1, 48
; LMULMAX1-NEXT: vse64.v v25, (a0)
; LMULMAX1-NEXT: vse64.v v27, (a0)
; LMULMAX1-NEXT: addi a0, a1, 32
; LMULMAX1-NEXT: vse64.v v28, (a0)
; LMULMAX1-NEXT: vse64.v v27, (a1)
; LMULMAX1-NEXT: vse64.v v25, (a1)
; LMULMAX1-NEXT: addi a0, a1, 16
; LMULMAX1-NEXT: vse64.v v26, (a0)
; LMULMAX1-NEXT: ret

View File

@ -49,10 +49,10 @@ define void @insertelt_v3i64(<3 x i64>* %x, i64 %y) {
; RV32-NEXT: vmv.v.i v28, 0
; RV32-NEXT: vsetivli a3, 2, e64,m2,tu,mu
; RV32-NEXT: vslideup.vi v28, v26, 0
; RV32-NEXT: addi a3, a0, 20
; RV32-NEXT: vsetivli a4, 4, e32,m1,ta,mu
; RV32-NEXT: vlse32.v v26, (a3), zero
; RV32-NEXT: lw a3, 16(a0)
; RV32-NEXT: addi a4, a0, 20
; RV32-NEXT: vsetivli a5, 4, e32,m1,ta,mu
; RV32-NEXT: vlse32.v v26, (a4), zero
; RV32-NEXT: vmv.s.x v26, a3
; RV32-NEXT: vsetivli a3, 4, e64,m2,tu,mu
; RV32-NEXT: vslideup.vi v28, v26, 2
@ -62,10 +62,10 @@ define void @insertelt_v3i64(<3 x i64>* %x, i64 %y) {
; RV32-NEXT: vslide1up.vx v26, v30, a1
; RV32-NEXT: vsetivli a3, 3, e64,m2,tu,mu
; RV32-NEXT: vslideup.vi v28, v26, 2
; RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu
; RV32-NEXT: vse64.v v28, (a0)
; RV32-NEXT: sw a1, 16(a0)
; RV32-NEXT: sw a2, 20(a0)
; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; RV32-NEXT: vse64.v v28, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_v3i64:

View File

@ -265,6 +265,8 @@ define void @buildvec_seq2_v16i8_v2i64(<16 x i8>* %x) {
define void @buildvec_seq_v9i8(<9 x i8>* %x) {
; RV32-LABEL: buildvec_seq_v9i8:
; RV32: # %bb.0:
; RV32-NEXT: addi a1, zero, 3
; RV32-NEXT: sb a1, 8(a0)
; RV32-NEXT: addi a1, zero, 73
; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a1
@ -277,8 +279,6 @@ define void @buildvec_seq_v9i8(<9 x i8>* %x) {
; RV32-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; RV32-NEXT: vmerge.vim v25, v25, 3, v0
; RV32-NEXT: vse8.v v25, (a0)
; RV32-NEXT: addi a1, zero, 3
; RV32-NEXT: sb a1, 8(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: buildvec_seq_v9i8:

View File

@ -59,12 +59,11 @@ define void @sext_v8i8_v8i32(<8 x i8>* %x, <8 x i32>* %z) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a2, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vle8.v v25, (a0)
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v26, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vslidedown.vi v26, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v27, v25
; LMULMAX1-NEXT: vsext.vf4 v27, v26
; LMULMAX1-NEXT: vsext.vf4 v26, v25
; LMULMAX1-NEXT: addi a0, a1, 16
; LMULMAX1-NEXT: vse32.v v27, (a0)
; LMULMAX1-NEXT: vse32.v v26, (a1)
@ -126,24 +125,24 @@ define void @sext_v32i8_v32i32(<32 x i8>* %x, <32 x i32>* %z) {
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v29, v27
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v25, 8
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v30, v27
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v31, v27
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v26, 8
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v8, v27
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v27, v27, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v9, v27
; LMULMAX1-NEXT: vsext.vf4 v27, v25
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 8
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v30, v25
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v31, v25
; LMULMAX1-NEXT: vsext.vf4 v25, v26
; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v26, 8
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v8, v26
; LMULMAX1-NEXT: vsetivli a0, 4, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v26, v26, 4
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vsext.vf4 v9, v26
; LMULMAX1-NEXT: addi a0, a1, 48
; LMULMAX1-NEXT: vse32.v v9, (a0)
; LMULMAX1-NEXT: addi a0, a1, 32

View File

@ -91,14 +91,14 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) {
; CHECK-NEXT: vmv.v.i v26, 0
; CHECK-NEXT: vsetivli a0, 4, e16,m1,tu,mu
; CHECK-NEXT: vslideup.vi v26, v25, 3
; CHECK-NEXT: addi a0, zero, 8
; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v27, (a0)
; CHECK-NEXT: vrgather.vv v25, v8, v27
; CHECK-NEXT: addi a0, zero, 8
; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli a0, 4, e16,m1,tu,mu
; CHECK-NEXT: vrgather.vv v25, v9, v26, v0.t
; CHECK-NEXT: vmv1r.v v8, v25
@ -211,15 +211,15 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
; RV32-NEXT: vmerge.vim v26, v26, 2, v0
; RV32-NEXT: vsetivli a0, 8, e16,m1,tu,mu
; RV32-NEXT: vslideup.vi v26, v25, 7
; RV32-NEXT: addi a0, zero, 164
; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: lui a0, %hi(.LCPI11_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0)
; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; RV32-NEXT: vle16.v v25, (a0)
; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; RV32-NEXT: vrgatherei16.vv v28, v8, v25
; RV32-NEXT: addi a0, zero, 164
; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetivli a0, 8, e64,m4,tu,mu
; RV32-NEXT: vrgatherei16.vv v28, v12, v26, v0.t
; RV32-NEXT: vmv4r.v v8, v28
@ -238,14 +238,14 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
; RV64-NEXT: vmerge.vim v16, v16, 2, v0
; RV64-NEXT: vsetivli a0, 8, e64,m4,tu,mu
; RV64-NEXT: vslideup.vi v16, v28, 7
; RV64-NEXT: addi a0, zero, 164
; RV64-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: lui a0, %hi(.LCPI11_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI11_0)
; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu
; RV64-NEXT: vle64.v v20, (a0)
; RV64-NEXT: vrgather.vv v28, v8, v20
; RV64-NEXT: addi a0, zero, 164
; RV64-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: vsetivli a0, 8, e64,m4,tu,mu
; RV64-NEXT: vrgather.vv v28, v12, v16, v0.t
; RV64-NEXT: vmv4r.v v8, v28
@ -267,9 +267,6 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
; RV32-NEXT: vslideup.vi v27, v26, 5
; RV32-NEXT: vsetivli a0, 7, e16,m1,tu,mu
; RV32-NEXT: vslideup.vi v27, v25, 6
; RV32-NEXT: addi a0, zero, 113
; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: lui a0, %hi(.LCPI12_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0)
; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
@ -278,6 +275,9 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
; RV32-NEXT: vmv.v.i v12, -1
; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; RV32-NEXT: vrgatherei16.vv v28, v12, v25
; RV32-NEXT: addi a0, zero, 113
; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetivli a0, 8, e64,m4,tu,mu
; RV32-NEXT: vrgatherei16.vv v28, v8, v27, v0.t
; RV32-NEXT: vmv4r.v v8, v28
@ -311,24 +311,24 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) {
; RV32-LABEL: vrgather_shuffle_vx_v8i64:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, zero, 140
; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: lui a0, %hi(.LCPI13_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0)
; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; RV32-NEXT: vle16.v v25, (a0)
; RV32-NEXT: vmv4r.v v28, v8
; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; RV32-NEXT: vrgatherei16.vv v28, v8, v25
; RV32-NEXT: vrgatherei16.vv v8, v28, v25
; RV32-NEXT: addi a0, zero, 140
; RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: lui a0, %hi(.LCPI13_1)
; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1)
; RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; RV32-NEXT: vle16.v v25, (a0)
; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; RV32-NEXT: vmv.v.i v8, 5
; RV32-NEXT: vmv.v.i v28, 5
; RV32-NEXT: vsetivli a0, 8, e64,m4,tu,mu
; RV32-NEXT: vrgatherei16.vv v28, v8, v25, v0.t
; RV32-NEXT: vmv4r.v v8, v28
; RV32-NEXT: vrgatherei16.vv v8, v28, v25, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vrgather_shuffle_vx_v8i64:

View File

@ -366,8 +366,8 @@ define void @splat_zero_v32i8(<32 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a1)
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <32 x i8> undef, i8 0, i32 0
@ -395,8 +395,8 @@ define void @splat_zero_v16i16(<16 x i16>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x i16> undef, i16 0, i32 0
@ -424,8 +424,8 @@ define void @splat_zero_v8i32(<8 x i32>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, 0
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x i32> undef, i32 0, i32 0
@ -453,8 +453,8 @@ define void @splat_zero_v4i64(<4 x i64>* %x) {
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.i v25, 0
; LMULMAX1-RV32-NEXT: addi a1, a0, 16
; LMULMAX1-RV32-NEXT: vse32.v v25, (a1)
; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
; LMULMAX1-RV32-NEXT: addi a0, a0, 16
; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
; LMULMAX1-RV32-NEXT: ret
;
@ -476,8 +476,8 @@ define void @splat_zero_v4i64(<4 x i64>* %x) {
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmv.v.i v25, 0
; LMULMAX1-RV64-NEXT: addi a1, a0, 16
; LMULMAX1-RV64-NEXT: vse64.v v25, (a1)
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
; LMULMAX1-RV64-NEXT: addi a0, a0, 16
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = insertelement <4 x i64> undef, i64 0, i32 0
@ -594,8 +594,8 @@ define void @splat_allones_v32i8(<32 x i8>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a1)
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse8.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <32 x i8> undef, i8 -1, i32 0
@ -623,8 +623,8 @@ define void @splat_allones_v16i16(<16 x i16>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 8, e16,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a1)
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse16.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <16 x i16> undef, i16 -1, i32 0
@ -652,8 +652,8 @@ define void @splat_allones_v8i32(<8 x i32>* %x) {
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v25, -1
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a1)
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vse32.v v25, (a0)
; LMULMAX1-NEXT: ret
%a = insertelement <8 x i32> undef, i32 -1, i32 0
@ -681,8 +681,8 @@ define void @splat_allones_v4i64(<4 x i64>* %x) {
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.i v25, -1
; LMULMAX1-RV32-NEXT: addi a1, a0, 16
; LMULMAX1-RV32-NEXT: vse32.v v25, (a1)
; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
; LMULMAX1-RV32-NEXT: addi a0, a0, 16
; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
; LMULMAX1-RV32-NEXT: ret
;
@ -704,8 +704,8 @@ define void @splat_allones_v4i64(<4 x i64>* %x) {
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmv.v.i v25, -1
; LMULMAX1-RV64-NEXT: addi a1, a0, 16
; LMULMAX1-RV64-NEXT: vse64.v v25, (a1)
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
; LMULMAX1-RV64-NEXT: addi a0, a0, 16
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = insertelement <4 x i64> undef, i64 -1, i32 0
@ -798,13 +798,14 @@ define void @vadd_vx_v16i64(<16 x i64>* %a, i64 %b, <16 x i64>* %c) {
; LMULMAX8-RV32: # %bb.0:
; LMULMAX8-RV32-NEXT: vsetivli a4, 16, e64,m8,ta,mu
; LMULMAX8-RV32-NEXT: vle64.v v8, (a0)
; LMULMAX8-RV32-NEXT: lui a0, 349525
; LMULMAX8-RV32-NEXT: addi a0, a0, 1365
; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e32,m1,ta,mu
; LMULMAX8-RV32-NEXT: vmv.s.x v0, a0
; LMULMAX8-RV32-NEXT: addi a0, zero, 32
; LMULMAX8-RV32-NEXT: vsetvli a0, a0, e32,m8,ta,mu
; LMULMAX8-RV32-NEXT: vsetvli a4, a0, e32,m8,ta,mu
; LMULMAX8-RV32-NEXT: vmv.v.x v16, a2
; LMULMAX8-RV32-NEXT: lui a2, 349525
; LMULMAX8-RV32-NEXT: addi a2, a2, 1365
; LMULMAX8-RV32-NEXT: vsetivli a4, 1, e32,m1,ta,mu
; LMULMAX8-RV32-NEXT: vmv.s.x v0, a2
; LMULMAX8-RV32-NEXT: vsetvli a0, a0, e32,m8,ta,mu
; LMULMAX8-RV32-NEXT: vmerge.vxm v16, v16, a1, v0
; LMULMAX8-RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; LMULMAX8-RV32-NEXT: vadd.vv v8, v8, v16

View File

@ -1220,18 +1220,18 @@ define void @mulhs_v2i64(<2 x i64>* %x) {
; RV32-NEXT: vmv.s.x v26, a1
; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; RV32-NEXT: vmulh.vv v26, v25, v26
; RV32-NEXT: addi a1, zero, 1
; RV32-NEXT: addi a2, zero, 3
; RV32-NEXT: vsetivli a3, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a2
; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
; RV32-NEXT: addi a1, zero, 3
; RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
; RV32-NEXT: vmv.v.i v27, -1
; RV32-NEXT: vmerge.vim v27, v27, 0, v0
; RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; RV32-NEXT: vmul.vv v25, v25, v27
; RV32-NEXT: vadd.vv v25, v26, v25
; RV32-NEXT: addi a2, zero, 63
; RV32-NEXT: vsrl.vx v26, v25, a2
; RV32-NEXT: addi a1, zero, 63
; RV32-NEXT: vsrl.vx v26, v25, a1
; RV32-NEXT: addi a1, zero, 1
; RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
; RV32-NEXT: vmv.s.x v27, a1
; RV32-NEXT: vmv.v.i v28, 0
@ -3994,6 +3994,28 @@ define void @mulhu_v32i8(<32 x i8>* %x) {
; LMULMAX2-RV32-NEXT: addi a1, zero, 32
; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV32-NEXT: vle8.v v26, (a0)
; LMULMAX2-RV32-NEXT: lui a2, 66049
; LMULMAX2-RV32-NEXT: addi a2, a2, 32
; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI129_0)
; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI129_0)
; LMULMAX2-RV32-NEXT: vle8.v v28, (a2)
; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0
; LMULMAX2-RV32-NEXT: vmerge.vim v8, v30, 1, v0
; LMULMAX2-RV32-NEXT: vsrl.vv v8, v26, v8
; LMULMAX2-RV32-NEXT: vmulhu.vv v28, v8, v28
; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v28
; LMULMAX2-RV32-NEXT: lui a2, 163907
; LMULMAX2-RV32-NEXT: addi a2, a2, -2044
; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV32-NEXT: addi a2, zero, -128
; LMULMAX2-RV32-NEXT: vsetvli a3, a1, e8,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a2, v0
; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v30
; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28
; LMULMAX2-RV32-NEXT: lui a2, 8208
; LMULMAX2-RV32-NEXT: addi a2, a2, 513
; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu
@ -4011,30 +4033,8 @@ define void @mulhu_v32i8(<32 x i8>* %x) {
; LMULMAX2-RV32-NEXT: addi a2, a2, 304
; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 2, v0
; LMULMAX2-RV32-NEXT: lui a2, 163907
; LMULMAX2-RV32-NEXT: addi a2, a2, -2044
; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0
; LMULMAX2-RV32-NEXT: addi a2, zero, -128
; LMULMAX2-RV32-NEXT: vmerge.vxm v8, v30, a2, v0
; LMULMAX2-RV32-NEXT: lui a2, 66049
; LMULMAX2-RV32-NEXT: addi a2, a2, 32
; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV32-NEXT: vsetvli a1, a1, e8,m2,ta,mu
; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI129_0)
; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI129_0)
; LMULMAX2-RV32-NEXT: vle8.v v10, (a1)
; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 1, v0
; LMULMAX2-RV32-NEXT: vsrl.vv v30, v26, v30
; LMULMAX2-RV32-NEXT: vmulhu.vv v30, v30, v10
; LMULMAX2-RV32-NEXT: vsub.vv v26, v26, v30
; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v8
; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v30
; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 2, v0
; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28
; LMULMAX2-RV32-NEXT: vse8.v v26, (a0)
; LMULMAX2-RV32-NEXT: ret
@ -4044,6 +4044,28 @@ define void @mulhu_v32i8(<32 x i8>* %x) {
; LMULMAX2-RV64-NEXT: addi a1, zero, 32
; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV64-NEXT: vle8.v v26, (a0)
; LMULMAX2-RV64-NEXT: lui a2, 66049
; LMULMAX2-RV64-NEXT: addiw a2, a2, 32
; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI129_0)
; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI129_0)
; LMULMAX2-RV64-NEXT: vle8.v v28, (a2)
; LMULMAX2-RV64-NEXT: vmv.v.i v30, 0
; LMULMAX2-RV64-NEXT: vmerge.vim v8, v30, 1, v0
; LMULMAX2-RV64-NEXT: vsrl.vv v8, v26, v8
; LMULMAX2-RV64-NEXT: vmulhu.vv v28, v8, v28
; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v28
; LMULMAX2-RV64-NEXT: lui a2, 163907
; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044
; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV64-NEXT: addi a2, zero, -128
; LMULMAX2-RV64-NEXT: vsetvli a3, a1, e8,m2,ta,mu
; LMULMAX2-RV64-NEXT: vmerge.vxm v30, v30, a2, v0
; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v30
; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28
; LMULMAX2-RV64-NEXT: lui a2, 8208
; LMULMAX2-RV64-NEXT: addiw a2, a2, 513
; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu
@ -4061,30 +4083,8 @@ define void @mulhu_v32i8(<32 x i8>* %x) {
; LMULMAX2-RV64-NEXT: addiw a2, a2, 304
; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 2, v0
; LMULMAX2-RV64-NEXT: lui a2, 163907
; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044
; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV64-NEXT: vmv.v.i v30, 0
; LMULMAX2-RV64-NEXT: addi a2, zero, -128
; LMULMAX2-RV64-NEXT: vmerge.vxm v8, v30, a2, v0
; LMULMAX2-RV64-NEXT: lui a2, 66049
; LMULMAX2-RV64-NEXT: addiw a2, a2, 32
; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV64-NEXT: vsetvli a1, a1, e8,m2,ta,mu
; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI129_0)
; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI129_0)
; LMULMAX2-RV64-NEXT: vle8.v v10, (a1)
; LMULMAX2-RV64-NEXT: vmerge.vim v30, v30, 1, v0
; LMULMAX2-RV64-NEXT: vsrl.vv v30, v26, v30
; LMULMAX2-RV64-NEXT: vmulhu.vv v30, v30, v10
; LMULMAX2-RV64-NEXT: vsub.vv v26, v26, v30
; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v8
; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v30
; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 2, v0
; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v28
; LMULMAX2-RV64-NEXT: vse8.v v26, (a0)
; LMULMAX2-RV64-NEXT: ret
@ -4371,65 +4371,65 @@ define void @mulhu_v4i64(<4 x i64>* %x) {
;
; LMULMAX1-RV64-LABEL: mulhu_v4i64:
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: addi a1, zero, 2
; LMULMAX1-RV64-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
; LMULMAX1-RV64-NEXT: addi a1, a0, 16
; LMULMAX1-RV64-NEXT: vle64.v v26, (a1)
; LMULMAX1-RV64-NEXT: vmv.v.i v27, 0
; LMULMAX1-RV64-NEXT: addi a3, zero, -1
; LMULMAX1-RV64-NEXT: slli a3, a3, 63
; LMULMAX1-RV64-NEXT: vmv.s.x v27, a3
; LMULMAX1-RV64-NEXT: lui a3, 1044935
; LMULMAX1-RV64-NEXT: addiw a3, a3, 455
; LMULMAX1-RV64-NEXT: slli a3, a3, 12
; LMULMAX1-RV64-NEXT: addi a3, a3, 455
; LMULMAX1-RV64-NEXT: slli a3, a3, 12
; LMULMAX1-RV64-NEXT: addi a3, a3, 455
; LMULMAX1-RV64-NEXT: slli a3, a3, 13
; LMULMAX1-RV64-NEXT: addi a3, a3, 911
; LMULMAX1-RV64-NEXT: vmv.v.x v28, a3
; LMULMAX1-RV64-NEXT: lui a3, 4681
; LMULMAX1-RV64-NEXT: addiw a3, a3, 585
; LMULMAX1-RV64-NEXT: slli a3, a3, 12
; LMULMAX1-RV64-NEXT: addi a3, a3, 585
; LMULMAX1-RV64-NEXT: slli a3, a3, 12
; LMULMAX1-RV64-NEXT: addi a3, a3, 585
; LMULMAX1-RV64-NEXT: slli a3, a3, 13
; LMULMAX1-RV64-NEXT: addi a3, a3, 1171
; LMULMAX1-RV64-NEXT: vmv.s.x v28, a3
; LMULMAX1-RV64-NEXT: addi a2, zero, -1
; LMULMAX1-RV64-NEXT: slli a2, a2, 63
; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2
; LMULMAX1-RV64-NEXT: lui a2, 1044935
; LMULMAX1-RV64-NEXT: addiw a2, a2, 455
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, 455
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, 455
; LMULMAX1-RV64-NEXT: slli a2, a2, 13
; LMULMAX1-RV64-NEXT: addi a2, a2, 911
; LMULMAX1-RV64-NEXT: vmv.v.x v28, a2
; LMULMAX1-RV64-NEXT: lui a2, 4681
; LMULMAX1-RV64-NEXT: addiw a2, a2, 585
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, 585
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, 585
; LMULMAX1-RV64-NEXT: slli a2, a2, 13
; LMULMAX1-RV64-NEXT: addi a2, a2, 1171
; LMULMAX1-RV64-NEXT: vmv.s.x v28, a2
; LMULMAX1-RV64-NEXT: vmulhu.vv v28, v26, v28
; LMULMAX1-RV64-NEXT: vsub.vv v26, v26, v28
; LMULMAX1-RV64-NEXT: vmulhu.vv v26, v26, v27
; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28
; LMULMAX1-RV64-NEXT: vmv.v.i v27, 3
; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1
; LMULMAX1-RV64-NEXT: addi a2, zero, 2
; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2
; LMULMAX1-RV64-NEXT: vsrl.vv v26, v26, v27
; LMULMAX1-RV64-NEXT: vmv.v.i v27, 2
; LMULMAX1-RV64-NEXT: addi a1, zero, 1
; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1
; LMULMAX1-RV64-NEXT: lui a1, 1035469
; LMULMAX1-RV64-NEXT: addiw a1, a1, -819
; LMULMAX1-RV64-NEXT: slli a1, a1, 12
; LMULMAX1-RV64-NEXT: addi a1, a1, -819
; LMULMAX1-RV64-NEXT: slli a1, a1, 12
; LMULMAX1-RV64-NEXT: addi a1, a1, -819
; LMULMAX1-RV64-NEXT: slli a1, a1, 12
; LMULMAX1-RV64-NEXT: addi a1, a1, -819
; LMULMAX1-RV64-NEXT: vmv.v.x v28, a1
; LMULMAX1-RV64-NEXT: lui a1, 1026731
; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365
; LMULMAX1-RV64-NEXT: slli a1, a1, 12
; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
; LMULMAX1-RV64-NEXT: slli a1, a1, 12
; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
; LMULMAX1-RV64-NEXT: slli a1, a1, 12
; LMULMAX1-RV64-NEXT: addi a1, a1, -1365
; LMULMAX1-RV64-NEXT: vmv.s.x v28, a1
; LMULMAX1-RV64-NEXT: addi a2, zero, 1
; LMULMAX1-RV64-NEXT: vmv.s.x v27, a2
; LMULMAX1-RV64-NEXT: lui a2, 1035469
; LMULMAX1-RV64-NEXT: addiw a2, a2, -819
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, -819
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, -819
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, -819
; LMULMAX1-RV64-NEXT: vmv.v.x v28, a2
; LMULMAX1-RV64-NEXT: lui a2, 1026731
; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, -1365
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, -1365
; LMULMAX1-RV64-NEXT: slli a2, a2, 12
; LMULMAX1-RV64-NEXT: addi a2, a2, -1365
; LMULMAX1-RV64-NEXT: vmv.s.x v28, a2
; LMULMAX1-RV64-NEXT: vmulhu.vv v25, v25, v28
; LMULMAX1-RV64-NEXT: vsrl.vv v25, v25, v27
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
; LMULMAX1-RV64-NEXT: vse64.v v26, (a1)
; LMULMAX1-RV64-NEXT: ret
%a = load <4 x i64>, <4 x i64>* %x
%b = udiv <4 x i64> %a, <i64 3, i64 5, i64 7, i64 9>
@ -4443,18 +4443,18 @@ define void @mulhs_v32i8(<32 x i8>* %x) {
; LMULMAX2-RV32-NEXT: addi a1, zero, 32
; LMULMAX2-RV32-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV32-NEXT: vle8.v v26, (a0)
; LMULMAX2-RV32-NEXT: addi a2, zero, -123
; LMULMAX2-RV32-NEXT: vmv.v.x v28, a2
; LMULMAX2-RV32-NEXT: lui a2, 304453
; LMULMAX2-RV32-NEXT: addi a2, a2, -1452
; LMULMAX2-RV32-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV32-NEXT: addi a2, zero, 57
; LMULMAX2-RV32-NEXT: vsetvli a1, a1, e8,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmerge.vxm v28, v28, a2, v0
; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v28
; LMULMAX2-RV32-NEXT: vmv.v.i v28, 7
; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 1, v0
; LMULMAX2-RV32-NEXT: addi a1, zero, -123
; LMULMAX2-RV32-NEXT: vmv.v.x v30, a1
; LMULMAX2-RV32-NEXT: addi a1, zero, 57
; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a1, v0
; LMULMAX2-RV32-NEXT: vmulhu.vv v26, v26, v30
; LMULMAX2-RV32-NEXT: vsrl.vv v26, v26, v28
; LMULMAX2-RV32-NEXT: vse8.v v26, (a0)
; LMULMAX2-RV32-NEXT: ret
@ -4464,18 +4464,18 @@ define void @mulhs_v32i8(<32 x i8>* %x) {
; LMULMAX2-RV64-NEXT: addi a1, zero, 32
; LMULMAX2-RV64-NEXT: vsetvli a2, a1, e8,m2,ta,mu
; LMULMAX2-RV64-NEXT: vle8.v v26, (a0)
; LMULMAX2-RV64-NEXT: addi a2, zero, -123
; LMULMAX2-RV64-NEXT: vmv.v.x v28, a2
; LMULMAX2-RV64-NEXT: lui a2, 304453
; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452
; LMULMAX2-RV64-NEXT: vsetivli a3, 1, e32,m1,ta,mu
; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2
; LMULMAX2-RV64-NEXT: addi a2, zero, 57
; LMULMAX2-RV64-NEXT: vsetvli a1, a1, e8,m2,ta,mu
; LMULMAX2-RV64-NEXT: vmerge.vxm v28, v28, a2, v0
; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v28
; LMULMAX2-RV64-NEXT: vmv.v.i v28, 7
; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 1, v0
; LMULMAX2-RV64-NEXT: addi a1, zero, -123
; LMULMAX2-RV64-NEXT: vmv.v.x v30, a1
; LMULMAX2-RV64-NEXT: addi a1, zero, 57
; LMULMAX2-RV64-NEXT: vmerge.vxm v30, v30, a1, v0
; LMULMAX2-RV64-NEXT: vmulhu.vv v26, v26, v30
; LMULMAX2-RV64-NEXT: vsrl.vv v26, v26, v28
; LMULMAX2-RV64-NEXT: vse8.v v26, (a0)
; LMULMAX2-RV64-NEXT: ret

View File

@ -94,19 +94,19 @@ define void @splat_zeros_v32i1(<32 x i1>* %x) {
;
; LMULMAX1-RV32-LABEL: splat_zeros_v32i1:
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: addi a1, a0, 2
; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmclr.m v25
; LMULMAX1-RV32-NEXT: vse1.v v25, (a1)
; LMULMAX1-RV32-NEXT: vse1.v v25, (a0)
; LMULMAX1-RV32-NEXT: addi a0, a0, 2
; LMULMAX1-RV32-NEXT: vse1.v v25, (a0)
; LMULMAX1-RV32-NEXT: ret
;
; LMULMAX1-RV64-LABEL: splat_zeros_v32i1:
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: addi a1, a0, 2
; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmclr.m v25
; LMULMAX1-RV64-NEXT: vse1.v v25, (a1)
; LMULMAX1-RV64-NEXT: vse1.v v25, (a0)
; LMULMAX1-RV64-NEXT: addi a0, a0, 2
; LMULMAX1-RV64-NEXT: vse1.v v25, (a0)
; LMULMAX1-RV64-NEXT: ret
store <32 x i1> zeroinitializer, <32 x i1>* %x
@ -126,27 +126,27 @@ define void @splat_ones_v64i1(<64 x i1>* %x) {
;
; LMULMAX1-RV32-LABEL: splat_ones_v64i1:
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: addi a1, a0, 6
; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmset.m v25
; LMULMAX1-RV32-NEXT: vse1.v v25, (a0)
; LMULMAX1-RV32-NEXT: addi a1, a0, 6
; LMULMAX1-RV32-NEXT: vse1.v v25, (a1)
; LMULMAX1-RV32-NEXT: addi a1, a0, 4
; LMULMAX1-RV32-NEXT: vse1.v v25, (a1)
; LMULMAX1-RV32-NEXT: addi a1, a0, 2
; LMULMAX1-RV32-NEXT: vse1.v v25, (a1)
; LMULMAX1-RV32-NEXT: addi a0, a0, 2
; LMULMAX1-RV32-NEXT: vse1.v v25, (a0)
; LMULMAX1-RV32-NEXT: ret
;
; LMULMAX1-RV64-LABEL: splat_ones_v64i1:
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: addi a1, a0, 6
; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmset.m v25
; LMULMAX1-RV64-NEXT: vse1.v v25, (a0)
; LMULMAX1-RV64-NEXT: addi a1, a0, 6
; LMULMAX1-RV64-NEXT: vse1.v v25, (a1)
; LMULMAX1-RV64-NEXT: addi a1, a0, 4
; LMULMAX1-RV64-NEXT: vse1.v v25, (a1)
; LMULMAX1-RV64-NEXT: addi a1, a0, 2
; LMULMAX1-RV64-NEXT: vse1.v v25, (a1)
; LMULMAX1-RV64-NEXT: addi a0, a0, 2
; LMULMAX1-RV64-NEXT: vse1.v v25, (a0)
; LMULMAX1-RV64-NEXT: ret
store <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <64 x i1>* %x

View File

@ -2181,11 +2181,10 @@ define <32 x i8> @mgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m
; RV64: # %bb.0:
; RV64-NEXT: vmv1r.v v25, v0
; RV64-NEXT: vsetivli a1, 16, e8,m2,ta,mu
; RV64-NEXT: vslidedown.vi v26, v8, 16
; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV64-NEXT: vsext.vf8 v16, v26
; RV64-NEXT: vsetivli a1, 16, e8,m2,ta,mu
; RV64-NEXT: vslidedown.vi v26, v10, 16
; RV64-NEXT: vslidedown.vi v28, v8, 16
; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV64-NEXT: vsext.vf8 v16, v28
; RV64-NEXT: vsetivli a1, 2, e8,m1,ta,mu
; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetivli a1, 16, e8,m1,tu,mu

View File

@ -1903,15 +1903,14 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs,
; RV64-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: vsetivli a1, 16, e8,m2,ta,mu
; RV64-NEXT: vslidedown.vi v26, v10, 16
; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV64-NEXT: vsext.vf8 v16, v26
; RV64-NEXT: vsetivli a1, 16, e8,m2,ta,mu
; RV64-NEXT: vslidedown.vi v26, v8, 16
; RV64-NEXT: vslidedown.vi v28, v10, 16
; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV64-NEXT: vsext.vf8 v8, v28
; RV64-NEXT: vsetivli a1, 2, e8,m1,ta,mu
; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; RV64-NEXT: vsoxei64.v v26, (a0), v16, v0.t
; RV64-NEXT: vsoxei64.v v26, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs
call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> %val, <32 x i8*> %ptrs, i32 1, <32 x i1> %m)

View File

@ -459,10 +459,10 @@ define i64 @vreduce_add_v1i64(<1 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: addi a0, zero, 32
; RV32-NEXT: vsrl.vx v26, v25, a0
; RV32-NEXT: vmv.x.s a1, v26
; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_add_v1i64:
@ -1165,10 +1165,10 @@ define i64 @vreduce_and_v1i64(<1 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: addi a0, zero, 32
; RV32-NEXT: vsrl.vx v26, v25, a0
; RV32-NEXT: vmv.x.s a1, v26
; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_and_v1i64:
@ -1871,10 +1871,10 @@ define i64 @vreduce_or_v1i64(<1 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: addi a0, zero, 32
; RV32-NEXT: vsrl.vx v26, v25, a0
; RV32-NEXT: vmv.x.s a1, v26
; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_or_v1i64:
@ -2577,10 +2577,10 @@ define i64 @vreduce_xor_v1i64(<1 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: addi a0, zero, 32
; RV32-NEXT: vsrl.vx v26, v25, a0
; RV32-NEXT: vmv.x.s a1, v26
; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_xor_v1i64:
@ -3505,10 +3505,10 @@ define i64 @vreduce_smin_v1i64(<1 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: addi a0, zero, 32
; RV32-NEXT: vsrl.vx v26, v25, a0
; RV32-NEXT: vmv.x.s a1, v26
; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smin_v1i64:
@ -4297,10 +4297,10 @@ define i64 @vreduce_smax_v1i64(<1 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: addi a0, zero, 32
; RV32-NEXT: vsrl.vx v26, v25, a0
; RV32-NEXT: vmv.x.s a1, v26
; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smax_v1i64:
@ -5056,10 +5056,10 @@ define i64 @vreduce_umin_v1i64(<1 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: addi a0, zero, 32
; RV32-NEXT: vsrl.vx v26, v25, a0
; RV32-NEXT: vmv.x.s a1, v26
; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_umin_v1i64:
@ -5762,10 +5762,10 @@ define i64 @vreduce_umax_v1i64(<1 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: addi a0, zero, 32
; RV32-NEXT: vsrl.vx v26, v25, a0
; RV32-NEXT: vmv.x.s a1, v26
; RV32-NEXT: vmv.x.s a0, v25
; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_umax_v1i64:

View File

@ -199,9 +199,9 @@ declare <4 x i64> @llvm.experimental.stepvector.v4i64()
define <4 x i64> @stepvector_v4i64() {
; LMULMAX1-LABEL: stepvector_v4i64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a0, zero, 2
; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v9, 3
; LMULMAX1-NEXT: addi a0, zero, 2
; LMULMAX1-NEXT: vmv.s.x v9, a0
; LMULMAX1-NEXT: vid.v v8
; LMULMAX1-NEXT: ret
@ -220,16 +220,16 @@ declare <8 x i64> @llvm.experimental.stepvector.v8i64()
define <8 x i64> @stepvector_v8i64() {
; LMULMAX1-LABEL: stepvector_v8i64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a0, zero, 2
; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v10, 5
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vmv.s.x v10, a1
; LMULMAX1-NEXT: vmv.v.i v11, 7
; LMULMAX1-NEXT: addi a1, zero, 6
; LMULMAX1-NEXT: vmv.s.x v11, a1
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v9, 3
; LMULMAX1-NEXT: addi a0, zero, 2
; LMULMAX1-NEXT: vmv.s.x v9, a0
; LMULMAX1-NEXT: vmv.v.i v10, 5
; LMULMAX1-NEXT: addi a0, zero, 4
; LMULMAX1-NEXT: vmv.s.x v10, a0
; LMULMAX1-NEXT: vmv.v.i v11, 7
; LMULMAX1-NEXT: addi a0, zero, 6
; LMULMAX1-NEXT: vmv.s.x v11, a0
; LMULMAX1-NEXT: vid.v v8
; LMULMAX1-NEXT: ret
;
@ -250,28 +250,28 @@ declare <16 x i64> @llvm.experimental.stepvector.v16i64()
define <16 x i64> @stepvector_v16i64() {
; LMULMAX1-LABEL: stepvector_v16i64:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi a0, zero, 2
; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v10, 5
; LMULMAX1-NEXT: addi a1, zero, 4
; LMULMAX1-NEXT: vmv.s.x v10, a1
; LMULMAX1-NEXT: vmv.v.i v11, 7
; LMULMAX1-NEXT: addi a1, zero, 6
; LMULMAX1-NEXT: vmv.s.x v11, a1
; LMULMAX1-NEXT: vmv.v.i v12, 9
; LMULMAX1-NEXT: addi a1, zero, 8
; LMULMAX1-NEXT: vmv.s.x v12, a1
; LMULMAX1-NEXT: vmv.v.i v13, 11
; LMULMAX1-NEXT: addi a1, zero, 10
; LMULMAX1-NEXT: vmv.s.x v13, a1
; LMULMAX1-NEXT: vmv.v.i v14, 13
; LMULMAX1-NEXT: addi a1, zero, 12
; LMULMAX1-NEXT: vmv.s.x v14, a1
; LMULMAX1-NEXT: vmv.v.i v15, 15
; LMULMAX1-NEXT: addi a1, zero, 14
; LMULMAX1-NEXT: vmv.s.x v15, a1
; LMULMAX1-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vmv.v.i v9, 3
; LMULMAX1-NEXT: addi a0, zero, 2
; LMULMAX1-NEXT: vmv.s.x v9, a0
; LMULMAX1-NEXT: vmv.v.i v10, 5
; LMULMAX1-NEXT: addi a0, zero, 4
; LMULMAX1-NEXT: vmv.s.x v10, a0
; LMULMAX1-NEXT: vmv.v.i v11, 7
; LMULMAX1-NEXT: addi a0, zero, 6
; LMULMAX1-NEXT: vmv.s.x v11, a0
; LMULMAX1-NEXT: vmv.v.i v12, 9
; LMULMAX1-NEXT: addi a0, zero, 8
; LMULMAX1-NEXT: vmv.s.x v12, a0
; LMULMAX1-NEXT: vmv.v.i v13, 11
; LMULMAX1-NEXT: addi a0, zero, 10
; LMULMAX1-NEXT: vmv.s.x v13, a0
; LMULMAX1-NEXT: vmv.v.i v14, 13
; LMULMAX1-NEXT: addi a0, zero, 12
; LMULMAX1-NEXT: vmv.s.x v14, a0
; LMULMAX1-NEXT: vmv.v.i v15, 15
; LMULMAX1-NEXT: addi a0, zero, 14
; LMULMAX1-NEXT: vmv.s.x v15, a0
; LMULMAX1-NEXT: vid.v v8
; LMULMAX1-NEXT: ret
;

View File

@ -15,9 +15,8 @@ define i64 @test(<vscale x 1 x i64> %0) nounwind {
; CHECK: bb.0.entry:
; CHECK: liveins: $v8
; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8
; CHECK: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
; CHECK: dead %3:gpr = PseudoVSETIVLI 1, 88, implicit-def $vl, implicit-def $vtype
; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, $noreg, 64, implicit $vl, implicit $vtype
; CHECK: dead %2:gpr = PseudoVSETIVLI 1, 88, implicit-def $vl, implicit-def $vtype
; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 64, implicit $vl, implicit $vtype
; CHECK: [[LD:%[0-9]+]]:gpr = LD %stack.0.a, 0 :: (dereferenceable load 8 from %ir.a)
; CHECK: $x10 = COPY [[LD]]
; CHECK: PseudoRET implicit $x10

View File

@ -576,8 +576,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV32MV-NEXT: vsub.vv v25, v25, v27
; RV32MV-NEXT: vmul.vv v25, v25, v26
; RV32MV-NEXT: vsll.vi v26, v25, 1
; RV32MV-NEXT: vmv.v.i v27, 10
; RV32MV-NEXT: addi a1, zero, 9
; RV32MV-NEXT: vmv.v.i v27, 10
; RV32MV-NEXT: vmv.s.x v27, a1
; RV32MV-NEXT: vsll.vv v26, v26, v27
; RV32MV-NEXT: addi a1, zero, 2047
@ -637,8 +637,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64MV-NEXT: vsub.vv v25, v25, v27
; RV64MV-NEXT: vmul.vv v25, v25, v26
; RV64MV-NEXT: vsll.vi v26, v25, 1
; RV64MV-NEXT: vmv.v.i v27, 10
; RV64MV-NEXT: addi a1, zero, 9
; RV64MV-NEXT: vmv.v.i v27, 10
; RV64MV-NEXT: vmv.s.x v27, a1
; RV64MV-NEXT: vsll.vv v26, v26, v27
; RV64MV-NEXT: addi a1, zero, 2047