[RISCV] Improve 64-bit integer materialization for some cases.

This adds a new integer materialization strategy mainly targeted
at 64-bit constants like 0xffffffff where there are 32 or more trailing
ones with leading zeros. We can materialize these by using an addi -1
and srli to restore the leading zeros. This matches what gcc does.

I haven't limited to just these cases though. The implementation
here takes the constant, shifts out all the leading zeros and
shifts ones into the LSBs, creates the new sequence, adds an srli,
and checks if this is shorter than our original strategy.

I've separated the recursive portion into a standalone function
so I could append the new strategy outside of the recursion. Since
external users are no longer using the recursive function, I've
cleaned up the external interface to return the sequence instead of
taking a vector by reference.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D98821
This commit is contained in:
Craig Topper 2021-04-01 08:51:28 -07:00
parent 9783437885
commit d61b40ed27
23 changed files with 250 additions and 248 deletions

View File

@ -2201,8 +2201,7 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
MCStreamer &Out) {
RISCVMatInt::InstSeq Seq;
RISCVMatInt::generateInstSeq(Value, isRV64(), Seq);
RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Value, isRV64());
MCRegister SrcReg = RISCV::X0;
for (RISCVMatInt::Inst &Inst : Seq) {

View File

@ -10,11 +10,11 @@
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/ADT/APInt.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
namespace llvm {
namespace RISCVMatInt {
void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res) {
// Recursively generate a sequence for materializing an integer.
static void generateInstSeqImpl(int64_t Val, bool IsRV64,
RISCVMatInt::InstSeq &Res) {
if (isInt<32>(Val)) {
// Depending on the active bits in the immediate Value v, the following
// instruction sequences are emitted:
@ -27,11 +27,11 @@ void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res) {
int64_t Lo12 = SignExtend64<12>(Val);
if (Hi20)
Res.push_back(Inst(RISCV::LUI, Hi20));
Res.push_back(RISCVMatInt::Inst(RISCV::LUI, Hi20));
if (Lo12 || Hi20 == 0) {
unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
Res.push_back(Inst(AddiOpc, Lo12));
Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12));
}
return;
}
@ -66,11 +66,40 @@ void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res) {
int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
generateInstSeq(Hi52, IsRV64, Res);
generateInstSeqImpl(Hi52, IsRV64, Res);
Res.push_back(Inst(RISCV::SLLI, ShiftAmount));
Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
if (Lo12)
Res.push_back(Inst(RISCV::ADDI, Lo12));
Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
}
namespace llvm {
namespace RISCVMatInt {
InstSeq generateInstSeq(int64_t Val, bool IsRV64) {
RISCVMatInt::InstSeq Res;
generateInstSeqImpl(Val, IsRV64, Res);
// If the constant is positive we might be able to generate a shifted constant
// with no leading zeros and use a final SRLI to restore them.
if (Val > 0 && Res.size() > 2) {
assert(IsRV64 && "Expected RV32 to only need 2 instructions");
unsigned ShiftAmount = countLeadingZeros((uint64_t)Val);
Val <<= ShiftAmount;
// Fill in the bits that will be shifted out with 1s. An example where this
// helps is trailing one masks with 32 or more ones. This will generate
// ADDI -1 and an SRLI.
Val |= maskTrailingOnes<uint64_t>(ShiftAmount);
RISCVMatInt::InstSeq TmpSeq;
generateInstSeqImpl(Val, IsRV64, TmpSeq);
TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, ShiftAmount));
// Keep the new sequence if it is an improvement.
if (TmpSeq.size() < Res.size())
Res = TmpSeq;
}
return Res;
}
int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64) {
@ -81,8 +110,7 @@ int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64) {
int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
InstSeq MatSeq;
generateInstSeq(Chunk.getSExtValue(), IsRV64, MatSeq);
InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), IsRV64);
Cost += MatSeq.size();
}
return std::max(1, Cost);

View File

@ -25,11 +25,11 @@ struct Inst {
using InstSeq = SmallVector<Inst, 8>;
// Helper to generate an instruction sequence that will materialise the given
// immediate value into a register. A sequence of instructions represented by
// a simple struct produced rather than directly emitting the instructions in
// immediate value into a register. A sequence of instructions represented by a
// simple struct is produced rather than directly emitting the instructions in
// order to allow this helper to be used from both the MC layer and during
// instruction selection.
void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res);
InstSeq generateInstSeq(int64_t Val, bool IsRV64);
// Helper to estimate the number of instructions required to materialise the
// given immediate value into a register. This estimate does not account for

View File

@ -46,8 +46,7 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
MVT XLenVT) {
RISCVMatInt::InstSeq Seq;
RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64, Seq);
RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64);
SDNode *Result = nullptr;
SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);

View File

@ -354,8 +354,7 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
if (!IsRV64 && !isInt<32>(Val))
report_fatal_error("Should only materialize 32-bit constants for RV32");
RISCVMatInt::InstSeq Seq;
RISCVMatInt::generateInstSeq(Val, IsRV64, Seq);
RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, IsRV64);
assert(Seq.size() > 0);
for (RISCVMatInt::Inst &Inst : Seq) {

View File

@ -39,8 +39,7 @@ define double @fold_promote_d_s(double %a, float %b) nounwind {
; RV64I-LABEL: fold_promote_d_s:
; RV64I: # %bb.0:
; RV64I-NEXT: addi a2, zero, -1
; RV64I-NEXT: slli a2, a2, 63
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: srli a2, a2, 1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: and a1, a1, a2
@ -111,8 +110,7 @@ define double @fold_promote_d_h(double %a, half %b) nounwind {
; RV64I-LABEL: fold_promote_d_h:
; RV64I: # %bb.0:
; RV64I-NEXT: addi a2, zero, -1
; RV64I-NEXT: slli a2, a2, 63
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: srli a2, a2, 1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: lui a2, 8
; RV64I-NEXT: and a1, a1, a2

View File

@ -67,16 +67,14 @@ define double @fabs(double %a) nounwind {
; RV64I-LABEL: fabs:
; RV64I: # %bb.0:
; RV64I-NEXT: addi a1, zero, -1
; RV64I-NEXT: slli a1, a1, 63
; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: srli a1, a1, 1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;
; RV64IFD-LABEL: fabs:
; RV64IFD: # %bb.0:
; RV64IFD-NEXT: addi a1, zero, -1
; RV64IFD-NEXT: slli a1, a1, 63
; RV64IFD-NEXT: addi a1, a1, -1
; RV64IFD-NEXT: srli a1, a1, 1
; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: ret
%1 = call double @llvm.fabs.f64(double %a)
@ -120,9 +118,9 @@ define double @fcopysign_fneg(double %a, double %b) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: not a1, a1
; RV64I-NEXT: addi a2, zero, -1
; RV64I-NEXT: slli a2, a2, 63
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: slli a3, a2, 63
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: srli a2, a2, 1
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret

View File

@ -385,8 +385,7 @@ define double @fabs_f64(double %a) nounwind {
; RV64IFD-LABEL: fabs_f64:
; RV64IFD: # %bb.0:
; RV64IFD-NEXT: addi a1, zero, -1
; RV64IFD-NEXT: slli a1, a1, 63
; RV64IFD-NEXT: addi a1, a1, -1
; RV64IFD-NEXT: srli a1, a1, 1
; RV64IFD-NEXT: and a0, a0, a1
; RV64IFD-NEXT: ret
%1 = call double @llvm.fabs.f64(double %a)

View File

@ -111,8 +111,7 @@ define double @bitcast_double_and(double %a1, double %a2) nounwind {
; RV64F-NEXT: mv s0, a0
; RV64F-NEXT: call __adddf3@plt
; RV64F-NEXT: addi a1, zero, -1
; RV64F-NEXT: slli a1, a1, 63
; RV64F-NEXT: addi a1, a1, -1
; RV64F-NEXT: srli a1, a1, 1
; RV64F-NEXT: and a1, a0, a1
; RV64F-NEXT: mv a0, s0
; RV64F-NEXT: call __adddf3@plt

View File

@ -183,9 +183,8 @@ define i64 @imm64_2() nounwind {
;
; RV64I-LABEL: imm64_2:
; RV64I: # %bb.0:
; RV64I-NEXT: addi a0, zero, 1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: addi a0, zero, -1
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
ret i64 4294967295 ; 0xFFFF_FFFF
}
@ -376,11 +375,9 @@ define i64 @imm_right_shifted_lui_1() nounwind {
;
; RV64I-LABEL: imm_right_shifted_lui_1:
; RV64I: # %bb.0:
; RV64I-NEXT: addi a0, zero, 1
; RV64I-NEXT: slli a0, a0, 36
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: slli a0, a0, 12
; RV64I-NEXT: addi a0, a0, 1
; RV64I-NEXT: lui a0, 983072
; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: srli a0, a0, 16
; RV64I-NEXT: ret
ret i64 281474976706561 ; 0xFFFF_FFFF_F001
}

View File

@ -932,18 +932,15 @@ define i64 @gorc16_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a1, a0, 16
; RV64I-NEXT: lui a2, 1048560
; RV64I-NEXT: addiw a2, a2, 1
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a0, 16
; RV64I-NEXT: lui a3, 16
; RV64I-NEXT: addiw a3, a3, -1
; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: addi a3, a3, 1
; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: slli a3, a2, 16
; RV64I-NEXT: addi a3, a3, -1
; RV64I-NEXT: and a2, a2, a3
; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: srli a3, a0, 16
; RV64I-NEXT: slli a2, a2, 32
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: srli a2, a2, 16
; RV64I-NEXT: and a2, a3, a2
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
@ -2165,17 +2162,14 @@ define i64 @grev16_i64(i64 %a) nounwind {
; RV64I-NEXT: slli a1, a0, 16
; RV64I-NEXT: lui a2, 1048560
; RV64I-NEXT: addiw a2, a2, 1
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: slli a3, a2, 16
; RV64I-NEXT: addi a3, a3, -1
; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: and a1, a1, a3
; RV64I-NEXT: srli a0, a0, 16
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: addiw a2, a2, -1
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: addi a2, a2, 1
; RV64I-NEXT: slli a2, a2, 16
; RV64I-NEXT: slli a2, a2, 32
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: srli a2, a2, 16
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret

View File

@ -783,8 +783,7 @@ define i64 @sbclri_i64_63(i64 %a) nounwind {
; RV64I-LABEL: sbclri_i64_63:
; RV64I: # %bb.0:
; RV64I-NEXT: addi a1, zero, -1
; RV64I-NEXT: slli a1, a1, 63
; RV64I-NEXT: addi a1, a1, -1
; RV64I-NEXT: srli a1, a1, 1
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
;

View File

@ -3552,8 +3552,7 @@ define i64 @vreduce_smin_v2i64(<2 x i64>* %x) {
; RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; RV64-NEXT: vle64.v v25, (a0)
; RV64-NEXT: addi a0, zero, -1
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; RV64-NEXT: vmv.v.x v26, a0
; RV64-NEXT: vsetivli a0, 2, e64,m1,ta,mu
@ -3596,8 +3595,7 @@ define i64 @vreduce_smin_v4i64(<4 x i64>* %x) {
; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; RV64-NEXT: vle64.v v26, (a0)
; RV64-NEXT: addi a0, zero, -1
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; RV64-NEXT: vmv.v.x v25, a0
; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu
@ -3641,8 +3639,7 @@ define i64 @vreduce_smin_v8i64(<8 x i64>* %x) {
; RV64-NEXT: vsetivli a1, 8, e64,m4,ta,mu
; RV64-NEXT: vle64.v v28, (a0)
; RV64-NEXT: addi a0, zero, -1
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; RV64-NEXT: vmv.v.x v25, a0
; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu
@ -3686,8 +3683,7 @@ define i64 @vreduce_smin_v16i64(<16 x i64>* %x) {
; RV64-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: addi a0, zero, -1
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; RV64-NEXT: vmv.v.x v25, a0
; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu
@ -3737,8 +3733,7 @@ define i64 @vreduce_smin_v32i64(<32 x i64>* %x) {
; RV64-NEXT: vle64.v v16, (a0)
; RV64-NEXT: vmin.vv v8, v8, v16
; RV64-NEXT: addi a0, zero, -1
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; RV64-NEXT: vmv.v.x v25, a0
; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu
@ -3800,8 +3795,7 @@ define i64 @vreduce_smin_v64i64(<64 x i64>* %x) nounwind {
; RV64-NEXT: vmin.vv v8, v8, v0
; RV64-NEXT: vmin.vv v8, v8, v16
; RV64-NEXT: addi a0, zero, -1
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; RV64-NEXT: vmv.v.x v25, a0
; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu

View File

@ -1209,8 +1209,7 @@ define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: vreduce_smin_nxv1i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, zero, -1
; CHECK-NEXT: slli a0, a0, 63
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vredmin.vs v25, v8, v25
@ -1334,8 +1333,7 @@ define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: vreduce_smin_nxv2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, zero, -1
; CHECK-NEXT: slli a0, a0, 63
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
@ -1467,8 +1465,7 @@ define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: vreduce_smin_nxv4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, zero, -1
; CHECK-NEXT: slli a0, a0, 63
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu

View File

@ -117,20 +117,22 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: func2:
; RV64I: # %bb.0:
; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: add a3, a0, a1
; RV64I-NEXT: addi a0, zero, -1
; RV64I-NEXT: slli a0, a0, 63
; RV64I-NEXT: bgez a3, .LBB1_2
; RV64I-NEXT: addi a2, zero, -1
; RV64I-NEXT: bltz a3, .LBB1_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: slli a2, a2, 63
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: slt a2, a3, a2
; RV64I-NEXT: srli a2, a2, 1
; RV64I-NEXT: .LBB1_3:
; RV64I-NEXT: slt a0, a3, a0
; RV64I-NEXT: slti a1, a1, 0
; RV64I-NEXT: bne a1, a2, .LBB1_4
; RV64I-NEXT: # %bb.3:
; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: .LBB1_4:
; RV64I-NEXT: bne a1, a0, .LBB1_5
; RV64I-NEXT: # %bb.4:
; RV64I-NEXT: mv a2, a3
; RV64I-NEXT: .LBB1_5:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
; RV32IZbbNOZbt-LABEL: func2:
@ -159,20 +161,22 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
;
; RV64IZbbNOZbt-LABEL: func2:
; RV64IZbbNOZbt: # %bb.0:
; RV64IZbbNOZbt-NEXT: mv a2, a0
; RV64IZbbNOZbt-NEXT: add a3, a0, a1
; RV64IZbbNOZbt-NEXT: addi a0, zero, -1
; RV64IZbbNOZbt-NEXT: slli a0, a0, 63
; RV64IZbbNOZbt-NEXT: bgez a3, .LBB1_2
; RV64IZbbNOZbt-NEXT: addi a2, zero, -1
; RV64IZbbNOZbt-NEXT: bltz a3, .LBB1_2
; RV64IZbbNOZbt-NEXT: # %bb.1:
; RV64IZbbNOZbt-NEXT: addi a0, a0, -1
; RV64IZbbNOZbt-NEXT: slli a2, a2, 63
; RV64IZbbNOZbt-NEXT: j .LBB1_3
; RV64IZbbNOZbt-NEXT: .LBB1_2:
; RV64IZbbNOZbt-NEXT: slt a2, a3, a2
; RV64IZbbNOZbt-NEXT: srli a2, a2, 1
; RV64IZbbNOZbt-NEXT: .LBB1_3:
; RV64IZbbNOZbt-NEXT: slt a0, a3, a0
; RV64IZbbNOZbt-NEXT: slti a1, a1, 0
; RV64IZbbNOZbt-NEXT: bne a1, a2, .LBB1_4
; RV64IZbbNOZbt-NEXT: # %bb.3:
; RV64IZbbNOZbt-NEXT: mv a0, a3
; RV64IZbbNOZbt-NEXT: .LBB1_4:
; RV64IZbbNOZbt-NEXT: bne a1, a0, .LBB1_5
; RV64IZbbNOZbt-NEXT: # %bb.4:
; RV64IZbbNOZbt-NEXT: mv a2, a3
; RV64IZbbNOZbt-NEXT: .LBB1_5:
; RV64IZbbNOZbt-NEXT: mv a0, a2
; RV64IZbbNOZbt-NEXT: ret
;
; RV32IZbbZbt-LABEL: func2:
@ -199,9 +203,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; RV64IZbbZbt-NEXT: add a2, a0, a1
; RV64IZbbZbt-NEXT: slti a3, a2, 0
; RV64IZbbZbt-NEXT: addi a4, zero, -1
; RV64IZbbZbt-NEXT: slli a4, a4, 63
; RV64IZbbZbt-NEXT: addi a5, a4, -1
; RV64IZbbZbt-NEXT: cmov a3, a3, a5, a4
; RV64IZbbZbt-NEXT: slli a5, a4, 63
; RV64IZbbZbt-NEXT: srli a4, a4, 1
; RV64IZbbZbt-NEXT: cmov a3, a3, a4, a5
; RV64IZbbZbt-NEXT: slt a0, a2, a0
; RV64IZbbZbt-NEXT: slti a1, a1, 0
; RV64IZbbZbt-NEXT: xor a0, a1, a0

View File

@ -125,20 +125,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
;
; RV64I-LABEL: func64:
; RV64I: # %bb.0:
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: add a3, a0, a2
; RV64I-NEXT: addi a0, zero, -1
; RV64I-NEXT: slli a0, a0, 63
; RV64I-NEXT: bgez a3, .LBB1_2
; RV64I-NEXT: addi a1, zero, -1
; RV64I-NEXT: bltz a3, .LBB1_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: slli a1, a1, 63
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: slt a1, a3, a1
; RV64I-NEXT: srli a1, a1, 1
; RV64I-NEXT: .LBB1_3:
; RV64I-NEXT: slt a0, a3, a0
; RV64I-NEXT: slti a2, a2, 0
; RV64I-NEXT: bne a2, a1, .LBB1_4
; RV64I-NEXT: # %bb.3:
; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: .LBB1_4:
; RV64I-NEXT: bne a2, a0, .LBB1_5
; RV64I-NEXT: # %bb.4:
; RV64I-NEXT: mv a1, a3
; RV64I-NEXT: .LBB1_5:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
; RV32IZbbNOZbt-LABEL: func64:
@ -167,20 +169,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
;
; RV64IZbbNOZbt-LABEL: func64:
; RV64IZbbNOZbt: # %bb.0:
; RV64IZbbNOZbt-NEXT: mv a1, a0
; RV64IZbbNOZbt-NEXT: add a3, a0, a2
; RV64IZbbNOZbt-NEXT: addi a0, zero, -1
; RV64IZbbNOZbt-NEXT: slli a0, a0, 63
; RV64IZbbNOZbt-NEXT: bgez a3, .LBB1_2
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
; RV64IZbbNOZbt-NEXT: bltz a3, .LBB1_2
; RV64IZbbNOZbt-NEXT: # %bb.1:
; RV64IZbbNOZbt-NEXT: addi a0, a0, -1
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
; RV64IZbbNOZbt-NEXT: j .LBB1_3
; RV64IZbbNOZbt-NEXT: .LBB1_2:
; RV64IZbbNOZbt-NEXT: slt a1, a3, a1
; RV64IZbbNOZbt-NEXT: srli a1, a1, 1
; RV64IZbbNOZbt-NEXT: .LBB1_3:
; RV64IZbbNOZbt-NEXT: slt a0, a3, a0
; RV64IZbbNOZbt-NEXT: slti a2, a2, 0
; RV64IZbbNOZbt-NEXT: bne a2, a1, .LBB1_4
; RV64IZbbNOZbt-NEXT: # %bb.3:
; RV64IZbbNOZbt-NEXT: mv a0, a3
; RV64IZbbNOZbt-NEXT: .LBB1_4:
; RV64IZbbNOZbt-NEXT: bne a2, a0, .LBB1_5
; RV64IZbbNOZbt-NEXT: # %bb.4:
; RV64IZbbNOZbt-NEXT: mv a1, a3
; RV64IZbbNOZbt-NEXT: .LBB1_5:
; RV64IZbbNOZbt-NEXT: mv a0, a1
; RV64IZbbNOZbt-NEXT: ret
;
; RV32IZbbZbt-LABEL: func64:
@ -207,9 +211,9 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; RV64IZbbZbt-NEXT: add a1, a0, a2
; RV64IZbbZbt-NEXT: slti a3, a1, 0
; RV64IZbbZbt-NEXT: addi a4, zero, -1
; RV64IZbbZbt-NEXT: slli a4, a4, 63
; RV64IZbbZbt-NEXT: addi a5, a4, -1
; RV64IZbbZbt-NEXT: cmov a3, a3, a5, a4
; RV64IZbbZbt-NEXT: slli a5, a4, 63
; RV64IZbbZbt-NEXT: srli a4, a4, 1
; RV64IZbbZbt-NEXT: cmov a3, a3, a4, a5
; RV64IZbbZbt-NEXT: slt a0, a1, a0
; RV64IZbbZbt-NEXT: slti a2, a2, 0
; RV64IZbbZbt-NEXT: xor a0, a2, a0

View File

@ -407,9 +407,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64-NEXT: lwu a1, 8(s0)
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: addi s4, zero, 1
; RV64-NEXT: slli a1, s4, 40
; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: addi s4, zero, -1
; RV64-NEXT: srli a1, s4, 24
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ld a1, 0(s0)
; RV64-NEXT: slli a2, a0, 29
@ -464,8 +463,7 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64-NEXT: srli a3, a3, 32
; RV64-NEXT: sb a3, 12(s0)
; RV64-NEXT: slli a1, a1, 2
; RV64-NEXT: slli a3, s4, 33
; RV64-NEXT: addi a3, a3, -1
; RV64-NEXT: srli a3, s4, 31
; RV64-NEXT: and a2, a2, a3
; RV64-NEXT: srli a4, a2, 31
; RV64-NEXT: sub a1, a4, a1
@ -574,9 +572,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64M-NEXT: lwu a2, 8(a0)
; RV64M-NEXT: slli a1, a1, 32
; RV64M-NEXT: or a2, a2, a1
; RV64M-NEXT: addi a6, zero, 1
; RV64M-NEXT: slli a3, a6, 40
; RV64M-NEXT: addi a3, a3, -1
; RV64M-NEXT: addi a6, zero, -1
; RV64M-NEXT: srli a3, a6, 24
; RV64M-NEXT: and a2, a2, a3
; RV64M-NEXT: ld a3, 0(a0)
; RV64M-NEXT: slli a4, a2, 29
@ -653,8 +650,7 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64M-NEXT: srli a4, a4, 32
; RV64M-NEXT: sb a4, 12(a0)
; RV64M-NEXT: slli a2, a2, 2
; RV64M-NEXT: slli a4, a6, 33
; RV64M-NEXT: addi a4, a4, -1
; RV64M-NEXT: srli a4, a6, 31
; RV64M-NEXT: and a1, a1, a4
; RV64M-NEXT: srli a5, a1, 31
; RV64M-NEXT: sub a2, a5, a2
@ -781,20 +777,19 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64MV-NEXT: lwu a2, 8(a0)
; RV64MV-NEXT: slli a1, a1, 32
; RV64MV-NEXT: or a2, a2, a1
; RV64MV-NEXT: addi a6, zero, 1
; RV64MV-NEXT: slli a3, a6, 40
; RV64MV-NEXT: ld a4, 0(a0)
; RV64MV-NEXT: addi a3, a3, -1
; RV64MV-NEXT: and a2, a2, a3
; RV64MV-NEXT: slli a3, a2, 31
; RV64MV-NEXT: srli a5, a4, 33
; RV64MV-NEXT: or a3, a5, a3
; RV64MV-NEXT: slli a3, a3, 31
; RV64MV-NEXT: srai a3, a3, 31
; RV64MV-NEXT: slli a2, a2, 29
; RV64MV-NEXT: srai a2, a2, 31
; RV64MV-NEXT: addi a6, zero, -1
; RV64MV-NEXT: ld a3, 0(a0)
; RV64MV-NEXT: srli a4, a6, 24
; RV64MV-NEXT: and a2, a2, a4
; RV64MV-NEXT: slli a4, a2, 31
; RV64MV-NEXT: srli a5, a3, 33
; RV64MV-NEXT: or a4, a5, a4
; RV64MV-NEXT: slli a4, a4, 31
; RV64MV-NEXT: srai a4, a4, 31
; RV64MV-NEXT: slli a2, a2, 29
; RV64MV-NEXT: srai a2, a2, 31
; RV64MV-NEXT: slli a3, a3, 31
; RV64MV-NEXT: srai a3, a3, 31
; RV64MV-NEXT: lui a5, 10923
; RV64MV-NEXT: addiw a5, a5, -1365
; RV64MV-NEXT: slli a5, a5, 12
@ -803,12 +798,12 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64MV-NEXT: addi a5, a5, -1365
; RV64MV-NEXT: slli a5, a5, 12
; RV64MV-NEXT: addi a5, a5, -1365
; RV64MV-NEXT: mulh a5, a4, a5
; RV64MV-NEXT: mulh a5, a3, a5
; RV64MV-NEXT: srli a1, a5, 63
; RV64MV-NEXT: add a1, a5, a1
; RV64MV-NEXT: addi a5, zero, 6
; RV64MV-NEXT: mul a1, a1, a5
; RV64MV-NEXT: sub a1, a4, a1
; RV64MV-NEXT: sub a1, a3, a1
; RV64MV-NEXT: sd a1, 0(sp)
; RV64MV-NEXT: lui a1, 1035469
; RV64MV-NEXT: addiw a1, a1, -819
@ -819,11 +814,11 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64MV-NEXT: slli a1, a1, 13
; RV64MV-NEXT: addi a1, a1, -1639
; RV64MV-NEXT: mulh a1, a2, a1
; RV64MV-NEXT: srli a4, a1, 63
; RV64MV-NEXT: srli a3, a1, 63
; RV64MV-NEXT: srai a1, a1, 1
; RV64MV-NEXT: add a1, a1, a4
; RV64MV-NEXT: slli a4, a1, 2
; RV64MV-NEXT: add a1, a4, a1
; RV64MV-NEXT: add a1, a1, a3
; RV64MV-NEXT: slli a3, a1, 2
; RV64MV-NEXT: add a1, a3, a1
; RV64MV-NEXT: add a1, a2, a1
; RV64MV-NEXT: sd a1, 16(sp)
; RV64MV-NEXT: lui a1, 18725
@ -834,21 +829,20 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV64MV-NEXT: addi a1, a1, -1755
; RV64MV-NEXT: slli a1, a1, 12
; RV64MV-NEXT: addi a1, a1, -1755
; RV64MV-NEXT: mulh a1, a3, a1
; RV64MV-NEXT: mulh a1, a4, a1
; RV64MV-NEXT: srli a2, a1, 63
; RV64MV-NEXT: srai a1, a1, 1
; RV64MV-NEXT: add a1, a1, a2
; RV64MV-NEXT: slli a2, a1, 3
; RV64MV-NEXT: sub a1, a1, a2
; RV64MV-NEXT: add a1, a3, a1
; RV64MV-NEXT: add a1, a4, a1
; RV64MV-NEXT: sd a1, 8(sp)
; RV64MV-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; RV64MV-NEXT: vle64.v v26, (sp)
; RV64MV-NEXT: lui a1, %hi(.LCPI3_0)
; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_0)
; RV64MV-NEXT: vle64.v v28, (a1)
; RV64MV-NEXT: slli a1, a6, 33
; RV64MV-NEXT: addi a1, a1, -1
; RV64MV-NEXT: srli a1, a6, 31
; RV64MV-NEXT: vand.vx v26, v26, a1
; RV64MV-NEXT: vmsne.vv v0, v26, v28
; RV64MV-NEXT: vmv.v.i v26, 0

View File

@ -169,14 +169,13 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
; RV64IM-NEXT: addi a5, zero, 95
; RV64IM-NEXT: mul a2, a2, a5
; RV64IM-NEXT: sub a1, a1, a2
; RV64IM-NEXT: lui a2, 248
; RV64IM-NEXT: lui a2, 777976
; RV64IM-NEXT: addiw a2, a2, -1057
; RV64IM-NEXT: slli a2, a2, 15
; RV64IM-NEXT: addi a2, a2, -1057
; RV64IM-NEXT: slli a2, a2, 15
; RV64IM-NEXT: addi a2, a2, -1057
; RV64IM-NEXT: slli a2, a2, 13
; RV64IM-NEXT: addi a2, a2, -265
; RV64IM-NEXT: slli a2, a2, 14
; RV64IM-NEXT: addi a2, a2, -529
; RV64IM-NEXT: srli a2, a2, 1
; RV64IM-NEXT: mulh a2, a4, a2
; RV64IM-NEXT: sub a2, a2, a4
; RV64IM-NEXT: srli a5, a2, 63

View File

@ -115,20 +115,22 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: func2:
; RV64I: # %bb.0:
; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: sub a3, a0, a1
; RV64I-NEXT: addi a0, zero, -1
; RV64I-NEXT: slli a0, a0, 63
; RV64I-NEXT: bgez a3, .LBB1_2
; RV64I-NEXT: addi a2, zero, -1
; RV64I-NEXT: bltz a3, .LBB1_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: slli a2, a2, 63
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: srli a2, a2, 1
; RV64I-NEXT: .LBB1_3:
; RV64I-NEXT: sgtz a1, a1
; RV64I-NEXT: slt a2, a3, a2
; RV64I-NEXT: bne a1, a2, .LBB1_4
; RV64I-NEXT: # %bb.3:
; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: .LBB1_4:
; RV64I-NEXT: slt a0, a3, a0
; RV64I-NEXT: bne a1, a0, .LBB1_5
; RV64I-NEXT: # %bb.4:
; RV64I-NEXT: mv a2, a3
; RV64I-NEXT: .LBB1_5:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
; RV32IZbbNOZbt-LABEL: func2:
@ -156,20 +158,22 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
;
; RV64IZbbNOZbt-LABEL: func2:
; RV64IZbbNOZbt: # %bb.0:
; RV64IZbbNOZbt-NEXT: mv a2, a0
; RV64IZbbNOZbt-NEXT: sub a3, a0, a1
; RV64IZbbNOZbt-NEXT: addi a0, zero, -1
; RV64IZbbNOZbt-NEXT: slli a0, a0, 63
; RV64IZbbNOZbt-NEXT: bgez a3, .LBB1_2
; RV64IZbbNOZbt-NEXT: addi a2, zero, -1
; RV64IZbbNOZbt-NEXT: bltz a3, .LBB1_2
; RV64IZbbNOZbt-NEXT: # %bb.1:
; RV64IZbbNOZbt-NEXT: addi a0, a0, -1
; RV64IZbbNOZbt-NEXT: slli a2, a2, 63
; RV64IZbbNOZbt-NEXT: j .LBB1_3
; RV64IZbbNOZbt-NEXT: .LBB1_2:
; RV64IZbbNOZbt-NEXT: srli a2, a2, 1
; RV64IZbbNOZbt-NEXT: .LBB1_3:
; RV64IZbbNOZbt-NEXT: sgtz a1, a1
; RV64IZbbNOZbt-NEXT: slt a2, a3, a2
; RV64IZbbNOZbt-NEXT: bne a1, a2, .LBB1_4
; RV64IZbbNOZbt-NEXT: # %bb.3:
; RV64IZbbNOZbt-NEXT: mv a0, a3
; RV64IZbbNOZbt-NEXT: .LBB1_4:
; RV64IZbbNOZbt-NEXT: slt a0, a3, a0
; RV64IZbbNOZbt-NEXT: bne a1, a0, .LBB1_5
; RV64IZbbNOZbt-NEXT: # %bb.4:
; RV64IZbbNOZbt-NEXT: mv a2, a3
; RV64IZbbNOZbt-NEXT: .LBB1_5:
; RV64IZbbNOZbt-NEXT: mv a0, a2
; RV64IZbbNOZbt-NEXT: ret
;
; RV32IZbbZbt-LABEL: func2:
@ -199,9 +203,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; RV64IZbbZbt-NEXT: xor a0, a2, a0
; RV64IZbbZbt-NEXT: slti a2, a1, 0
; RV64IZbbZbt-NEXT: addi a3, zero, -1
; RV64IZbbZbt-NEXT: slli a3, a3, 63
; RV64IZbbZbt-NEXT: addi a4, a3, -1
; RV64IZbbZbt-NEXT: cmov a2, a2, a4, a3
; RV64IZbbZbt-NEXT: slli a4, a3, 63
; RV64IZbbZbt-NEXT: srli a3, a3, 1
; RV64IZbbZbt-NEXT: cmov a2, a2, a3, a4
; RV64IZbbZbt-NEXT: cmov a0, a0, a2, a1
; RV64IZbbZbt-NEXT: ret
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y);

View File

@ -123,20 +123,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
;
; RV64I-LABEL: func64:
; RV64I: # %bb.0:
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: sub a3, a0, a2
; RV64I-NEXT: addi a0, zero, -1
; RV64I-NEXT: slli a0, a0, 63
; RV64I-NEXT: bgez a3, .LBB1_2
; RV64I-NEXT: addi a1, zero, -1
; RV64I-NEXT: bltz a3, .LBB1_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: slli a1, a1, 63
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: srli a1, a1, 1
; RV64I-NEXT: .LBB1_3:
; RV64I-NEXT: sgtz a2, a2
; RV64I-NEXT: slt a1, a3, a1
; RV64I-NEXT: bne a2, a1, .LBB1_4
; RV64I-NEXT: # %bb.3:
; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: .LBB1_4:
; RV64I-NEXT: slt a0, a3, a0
; RV64I-NEXT: bne a2, a0, .LBB1_5
; RV64I-NEXT: # %bb.4:
; RV64I-NEXT: mv a1, a3
; RV64I-NEXT: .LBB1_5:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
; RV32IZbbNOZbt-LABEL: func64:
@ -164,20 +166,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
;
; RV64IZbbNOZbt-LABEL: func64:
; RV64IZbbNOZbt: # %bb.0:
; RV64IZbbNOZbt-NEXT: mv a1, a0
; RV64IZbbNOZbt-NEXT: sub a3, a0, a2
; RV64IZbbNOZbt-NEXT: addi a0, zero, -1
; RV64IZbbNOZbt-NEXT: slli a0, a0, 63
; RV64IZbbNOZbt-NEXT: bgez a3, .LBB1_2
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
; RV64IZbbNOZbt-NEXT: bltz a3, .LBB1_2
; RV64IZbbNOZbt-NEXT: # %bb.1:
; RV64IZbbNOZbt-NEXT: addi a0, a0, -1
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
; RV64IZbbNOZbt-NEXT: j .LBB1_3
; RV64IZbbNOZbt-NEXT: .LBB1_2:
; RV64IZbbNOZbt-NEXT: srli a1, a1, 1
; RV64IZbbNOZbt-NEXT: .LBB1_3:
; RV64IZbbNOZbt-NEXT: sgtz a2, a2
; RV64IZbbNOZbt-NEXT: slt a1, a3, a1
; RV64IZbbNOZbt-NEXT: bne a2, a1, .LBB1_4
; RV64IZbbNOZbt-NEXT: # %bb.3:
; RV64IZbbNOZbt-NEXT: mv a0, a3
; RV64IZbbNOZbt-NEXT: .LBB1_4:
; RV64IZbbNOZbt-NEXT: slt a0, a3, a0
; RV64IZbbNOZbt-NEXT: bne a2, a0, .LBB1_5
; RV64IZbbNOZbt-NEXT: # %bb.4:
; RV64IZbbNOZbt-NEXT: mv a1, a3
; RV64IZbbNOZbt-NEXT: .LBB1_5:
; RV64IZbbNOZbt-NEXT: mv a0, a1
; RV64IZbbNOZbt-NEXT: ret
;
; RV32IZbbZbt-LABEL: func64:
@ -207,9 +211,9 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; RV64IZbbZbt-NEXT: xor a0, a1, a0
; RV64IZbbZbt-NEXT: slti a1, a2, 0
; RV64IZbbZbt-NEXT: addi a3, zero, -1
; RV64IZbbZbt-NEXT: slli a3, a3, 63
; RV64IZbbZbt-NEXT: addi a4, a3, -1
; RV64IZbbZbt-NEXT: cmov a1, a1, a4, a3
; RV64IZbbZbt-NEXT: slli a4, a3, 63
; RV64IZbbZbt-NEXT: srli a3, a3, 1
; RV64IZbbZbt-NEXT: cmov a1, a1, a3, a4
; RV64IZbbZbt-NEXT: cmov a0, a0, a1, a2
; RV64IZbbZbt-NEXT: ret
%a = mul i64 %y, %z

View File

@ -407,7 +407,6 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a0
; RV64-NEXT: lbu a0, 4(a0)
; RV64-NEXT: lwu a1, 0(s0)
@ -430,8 +429,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64-NEXT: call __muldi3@plt
; RV64-NEXT: addi a0, a0, -1638
; RV64-NEXT: andi a0, a0, 2047
; RV64-NEXT: addi s4, zero, 1
; RV64-NEXT: sltu s1, s4, a0
; RV64-NEXT: addi a1, zero, 1
; RV64-NEXT: sltu s1, a1, a0
; RV64-NEXT: addi a1, zero, 1463
; RV64-NEXT: mv a0, s2
; RV64-NEXT: call __muldi3@plt
@ -448,12 +447,11 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64-NEXT: slli a1, s1, 22
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: sw a0, 0(s0)
; RV64-NEXT: slli a1, s4, 33
; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: addi a1, zero, -1
; RV64-NEXT: srli a1, a1, 31
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: srli a0, a0, 32
; RV64-NEXT: sb a0, 4(s0)
; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@ -531,12 +529,12 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64M-NEXT: andi a3, a3, 2047
; RV64M-NEXT: addi a4, zero, 1
; RV64M-NEXT: sltu a3, a4, a3
; RV64M-NEXT: addi a5, zero, 1463
; RV64M-NEXT: mul a2, a2, a5
; RV64M-NEXT: addi a4, zero, 1463
; RV64M-NEXT: mul a2, a2, a4
; RV64M-NEXT: addi a2, a2, -1463
; RV64M-NEXT: andi a2, a2, 2047
; RV64M-NEXT: addi a5, zero, 292
; RV64M-NEXT: sltu a2, a5, a2
; RV64M-NEXT: addi a4, zero, 292
; RV64M-NEXT: sltu a2, a4, a2
; RV64M-NEXT: neg a1, a1
; RV64M-NEXT: neg a2, a2
; RV64M-NEXT: andi a1, a1, 2047
@ -546,8 +544,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64M-NEXT: slli a2, a3, 22
; RV64M-NEXT: sub a1, a1, a2
; RV64M-NEXT: sw a1, 0(a0)
; RV64M-NEXT: slli a2, a4, 33
; RV64M-NEXT: addi a2, a2, -1
; RV64M-NEXT: addi a2, zero, -1
; RV64M-NEXT: srli a2, a2, 31
; RV64M-NEXT: and a1, a1, a2
; RV64M-NEXT: srli a1, a1, 32
; RV64M-NEXT: sb a1, 4(a0)
@ -649,9 +647,9 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64MV-NEXT: addi a2, zero, 1
; RV64MV-NEXT: vmv1r.v v28, v27
; RV64MV-NEXT: vmv.s.x v28, a2
; RV64MV-NEXT: lui a3, %hi(.LCPI4_1)
; RV64MV-NEXT: addi a3, a3, %lo(.LCPI4_1)
; RV64MV-NEXT: vle16.v v29, (a3)
; RV64MV-NEXT: lui a2, %hi(.LCPI4_1)
; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_1)
; RV64MV-NEXT: vle16.v v29, (a2)
; RV64MV-NEXT: vsrl.vv v25, v25, v28
; RV64MV-NEXT: vor.vv v25, v25, v26
; RV64MV-NEXT: vand.vx v25, v25, a1
@ -659,19 +657,19 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
; RV64MV-NEXT: vmerge.vim v25, v27, -1, v0
; RV64MV-NEXT: vmv.x.s a1, v25
; RV64MV-NEXT: andi a1, a1, 2047
; RV64MV-NEXT: vsetivli a3, 1, e16,m1,ta,mu
; RV64MV-NEXT: vsetivli a2, 1, e16,m1,ta,mu
; RV64MV-NEXT: vslidedown.vi v26, v25, 1
; RV64MV-NEXT: vmv.x.s a3, v26
; RV64MV-NEXT: andi a3, a3, 2047
; RV64MV-NEXT: slli a3, a3, 11
; RV64MV-NEXT: or a1, a1, a3
; RV64MV-NEXT: vmv.x.s a2, v26
; RV64MV-NEXT: andi a2, a2, 2047
; RV64MV-NEXT: slli a2, a2, 11
; RV64MV-NEXT: or a1, a1, a2
; RV64MV-NEXT: vslidedown.vi v25, v25, 2
; RV64MV-NEXT: vmv.x.s a3, v25
; RV64MV-NEXT: slli a3, a3, 22
; RV64MV-NEXT: or a1, a1, a3
; RV64MV-NEXT: vmv.x.s a2, v25
; RV64MV-NEXT: slli a2, a2, 22
; RV64MV-NEXT: or a1, a1, a2
; RV64MV-NEXT: sw a1, 0(a0)
; RV64MV-NEXT: slli a2, a2, 33
; RV64MV-NEXT: addi a2, a2, -1
; RV64MV-NEXT: addi a2, zero, -1
; RV64MV-NEXT: srli a2, a2, 31
; RV64MV-NEXT: and a1, a1, a2
; RV64MV-NEXT: srli a1, a1, 32
; RV64MV-NEXT: sb a1, 4(a0)

View File

@ -60,9 +60,8 @@ li x12, -0x80000000
# CHECK-EXPAND: c.li a2, 1
# CHECK-EXPAND: c.slli a2, 31
li x12, 0x80000000
# CHECK-EXPAND: c.li a2, 1
# CHECK-EXPAND: c.slli a2, 32
# CHECK-EXPAND: c.addi a2, -1
# CHECK-EXPAND: c.li a2, -1
# CHECK-EXPAND: c.srli a2, 32
li x12, 0xFFFFFFFF
# CHECK-EXPAND: c.li t0, 1

View File

@ -72,9 +72,8 @@ li x12, -0x80000000
# CHECK-EXPAND: addi a2, zero, 1
# CHECK-EXPAND: slli a2, a2, 31
li x12, 0x80000000
# CHECK-EXPAND: addi a2, zero, 1
# CHECK-EXPAND: slli a2, a2, 32
# CHECK-EXPAND: addi a2, a2, -1
# CHECK-EXPAND: addi a2, zero, -1
# CHECK-EXPAND: srli a2, a2, 32
li x12, 0xFFFFFFFF
# CHECK-EXPAND: addi t0, zero, 1
@ -119,11 +118,8 @@ li x5, 0x100004000
# CHECK-EXPAND: addiw t1, t1, 1
# CHECK-EXPAND: slli t1, t1, 32
li x6, 0x100100000000
# CHECK-EXPAND: addi t2, zero, 1
# CHECK-EXPAND: slli t2, t2, 36
# CHECK-EXPAND: addi t2, t2, -1
# CHECK-EXPAND: slli t2, t2, 12
# CHECK-EXPAND: addi t2, t2, 1
# CHECK-EXPAND: lui t2, 983072
# CHECK-EXPAND: srli t2, t2, 16
li x7, 0xFFFFFFFFF001
# CHECK-EXPAND: lui s0, 65536
# CHECK-EXPAND: addiw s0, s0, -1