[RISCV] Override TargetLowering::BuildSDIVPow2 to generate SELECT

When `Zbt` is enabled, we can generate SELECT for division by power
of 2, so that there is no data dependency.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D114856
This commit is contained in:
wangpc 2022-01-11 15:52:00 +08:00
parent 50ec1306d0
commit 98d51c2542
3 changed files with 866 additions and 0 deletions

View File

@ -10350,6 +10350,60 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
return SDValue();
}
SDValue
RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N, 0); // Lower SDIV as SDIV
assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
"Unexpected divisor!");
// Conditional move is needed, so do the transformation iff Zbt is enabled.
if (!Subtarget.hasStdExtZbt())
return SDValue();
// When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation.
// Besides, more critical path instructions will be generated when dividing
// by 2. So we keep using the original DAGs for these cases.
unsigned Lg2 = Divisor.countTrailingZeros();
if (Lg2 == 1 || Lg2 >= 12)
return SDValue();
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
// Add (N0 < 0) ? Pow2 - 1 : 0;
SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
Created.push_back(Cmp.getNode());
Created.push_back(Add.getNode());
Created.push_back(Sel.getNode());
// Divide by pow2.
SDValue SRA =
DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Divisor.isNonNegative())
return SRA;
Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
#define GET_REGISTER_MATCHER
#include "RISCVGenAsmMatcher.inc"

View File

@ -528,6 +528,9 @@ public:
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
private:
/// RISCVCCAssignFn - This target-specific function extends the default
/// CCValAssign with additional information used to lower RISC-V calling

View File

@ -0,0 +1,809 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32ZBT
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64ZBT
define i32 @sdiv32_pow2_2(i32 %a) {
; RV32I-LABEL: sdiv32_pow2_2:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srli a1, a0, 31
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srai a0, a0, 1
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv32_pow2_2:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srli a1, a0, 31
; RV32ZBT-NEXT: add a0, a0, a1
; RV32ZBT-NEXT: srai a0, a0, 1
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv32_pow2_2:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srliw a1, a0, 31
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 1
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv32_pow2_2:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srliw a1, a0, 31
; RV64ZBT-NEXT: addw a0, a0, a1
; RV64ZBT-NEXT: sraiw a0, a0, 1
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i32 %a, 2
ret i32 %div
}
define i32 @sdiv32_pow2_negative_2(i32 %a) {
; RV32I-LABEL: sdiv32_pow2_negative_2:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srli a1, a0, 31
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srai a0, a0, 1
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv32_pow2_negative_2:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srli a1, a0, 31
; RV32ZBT-NEXT: add a0, a0, a1
; RV32ZBT-NEXT: srai a0, a0, 1
; RV32ZBT-NEXT: neg a0, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv32_pow2_negative_2:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srliw a1, a0, 31
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 1
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv32_pow2_negative_2:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srliw a1, a0, 31
; RV64ZBT-NEXT: addw a0, a0, a1
; RV64ZBT-NEXT: sraiw a0, a0, 1
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i32 %a, -2
ret i32 %div
}
define i32 @sdiv32_pow2_2048(i32 %a) {
; RV32I-LABEL: sdiv32_pow2_2048:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: srli a1, a1, 21
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srai a0, a0, 11
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv32_pow2_2048:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: slti a1, a0, 0
; RV32ZBT-NEXT: addi a2, a0, 2047
; RV32ZBT-NEXT: cmov a0, a1, a2, a0
; RV32ZBT-NEXT: srai a0, a0, 11
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv32_pow2_2048:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 21
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 11
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv32_pow2_2048:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: sext.w a1, a0
; RV64ZBT-NEXT: addi a2, a0, 2047
; RV64ZBT-NEXT: slti a1, a1, 0
; RV64ZBT-NEXT: cmov a0, a1, a2, a0
; RV64ZBT-NEXT: sraiw a0, a0, 11
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i32 %a, 2048
ret i32 %div
}
define i32 @sdiv32_pow2_negative_2048(i32 %a) {
; RV32I-LABEL: sdiv32_pow2_negative_2048:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: srli a1, a1, 21
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srai a0, a0, 11
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv32_pow2_negative_2048:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: slti a1, a0, 0
; RV32ZBT-NEXT: addi a2, a0, 2047
; RV32ZBT-NEXT: cmov a0, a1, a2, a0
; RV32ZBT-NEXT: srai a0, a0, 11
; RV32ZBT-NEXT: neg a0, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv32_pow2_negative_2048:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 21
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 11
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv32_pow2_negative_2048:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: sext.w a1, a0
; RV64ZBT-NEXT: addi a2, a0, 2047
; RV64ZBT-NEXT: slti a1, a1, 0
; RV64ZBT-NEXT: cmov a0, a1, a2, a0
; RV64ZBT-NEXT: sraiw a0, a0, 11
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i32 %a, -2048
ret i32 %div
}
define i32 @sdiv32_pow2_4096(i32 %a) {
; RV32I-LABEL: sdiv32_pow2_4096:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: srli a1, a1, 20
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srai a0, a0, 12
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv32_pow2_4096:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a1, a0, 31
; RV32ZBT-NEXT: srli a1, a1, 20
; RV32ZBT-NEXT: add a0, a0, a1
; RV32ZBT-NEXT: srai a0, a0, 12
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv32_pow2_4096:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 20
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 12
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv32_pow2_4096:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: sraiw a1, a0, 31
; RV64ZBT-NEXT: srliw a1, a1, 20
; RV64ZBT-NEXT: addw a0, a0, a1
; RV64ZBT-NEXT: sraiw a0, a0, 12
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i32 %a, 4096
ret i32 %div
}
define i32 @sdiv32_pow2_negative_4096(i32 %a) {
; RV32I-LABEL: sdiv32_pow2_negative_4096:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: srli a1, a1, 20
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srai a0, a0, 12
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv32_pow2_negative_4096:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a1, a0, 31
; RV32ZBT-NEXT: srli a1, a1, 20
; RV32ZBT-NEXT: add a0, a0, a1
; RV32ZBT-NEXT: srai a0, a0, 12
; RV32ZBT-NEXT: neg a0, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv32_pow2_negative_4096:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 20
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 12
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv32_pow2_negative_4096:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: sraiw a1, a0, 31
; RV64ZBT-NEXT: srliw a1, a1, 20
; RV64ZBT-NEXT: addw a0, a0, a1
; RV64ZBT-NEXT: sraiw a0, a0, 12
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i32 %a, -4096
ret i32 %div
}
define i32 @sdiv32_pow2_65536(i32 %a) {
; RV32I-LABEL: sdiv32_pow2_65536:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: srli a1, a1, 16
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv32_pow2_65536:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a1, a0, 31
; RV32ZBT-NEXT: srli a1, a1, 16
; RV32ZBT-NEXT: add a0, a0, a1
; RV32ZBT-NEXT: srai a0, a0, 16
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv32_pow2_65536:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 16
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 16
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv32_pow2_65536:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: sraiw a1, a0, 31
; RV64ZBT-NEXT: srliw a1, a1, 16
; RV64ZBT-NEXT: addw a0, a0, a1
; RV64ZBT-NEXT: sraiw a0, a0, 16
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i32 %a, 65536
ret i32 %div
}
define i32 @sdiv32_pow2_negative_65536(i32 %a) {
; RV32I-LABEL: sdiv32_pow2_negative_65536:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: srli a1, a1, 16
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv32_pow2_negative_65536:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a1, a0, 31
; RV32ZBT-NEXT: srli a1, a1, 16
; RV32ZBT-NEXT: add a0, a0, a1
; RV32ZBT-NEXT: srai a0, a0, 16
; RV32ZBT-NEXT: neg a0, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv32_pow2_negative_65536:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: srliw a1, a1, 16
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 16
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv32_pow2_negative_65536:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: sraiw a1, a0, 31
; RV64ZBT-NEXT: srliw a1, a1, 16
; RV64ZBT-NEXT: addw a0, a0, a1
; RV64ZBT-NEXT: sraiw a0, a0, 16
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i32 %a, -65536
ret i32 %div
}
define i64 @sdiv64_pow2_2(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_2:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srli a2, a1, 31
; RV32I-NEXT: add a2, a0, a2
; RV32I-NEXT: srli a3, a2, 1
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a1, a1, a0
; RV32I-NEXT: slli a0, a1, 31
; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: srai a1, a1, 1
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_2:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srli a2, a1, 31
; RV32ZBT-NEXT: add a2, a0, a2
; RV32ZBT-NEXT: sltu a0, a2, a0
; RV32ZBT-NEXT: add a1, a1, a0
; RV32ZBT-NEXT: fsri a0, a2, a1, 1
; RV32ZBT-NEXT: srai a1, a1, 1
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_2:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srli a1, a0, 63
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 1
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_2:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srli a1, a0, 63
; RV64ZBT-NEXT: add a0, a0, a1
; RV64ZBT-NEXT: srai a0, a0, 1
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, 2
ret i64 %div
}
define i64 @sdiv64_pow2_negative_2(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_negative_2:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srli a2, a1, 31
; RV32I-NEXT: add a2, a0, a2
; RV32I-NEXT: srli a3, a2, 1
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a1, a1, a0
; RV32I-NEXT: slli a0, a1, 31
; RV32I-NEXT: or a2, a3, a0
; RV32I-NEXT: neg a0, a2
; RV32I-NEXT: snez a2, a2
; RV32I-NEXT: srai a1, a1, 1
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_negative_2:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srli a2, a1, 31
; RV32ZBT-NEXT: add a2, a0, a2
; RV32ZBT-NEXT: sltu a0, a2, a0
; RV32ZBT-NEXT: add a1, a1, a0
; RV32ZBT-NEXT: fsri a2, a2, a1, 1
; RV32ZBT-NEXT: neg a0, a2
; RV32ZBT-NEXT: snez a2, a2
; RV32ZBT-NEXT: srai a1, a1, 1
; RV32ZBT-NEXT: add a1, a1, a2
; RV32ZBT-NEXT: neg a1, a1
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_negative_2:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srli a1, a0, 63
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 1
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_negative_2:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srli a1, a0, 63
; RV64ZBT-NEXT: add a0, a0, a1
; RV64ZBT-NEXT: srai a0, a0, 1
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, -2
ret i64 %div
}
define i64 @sdiv64_pow2_2048(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_2048:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a2, a1, 31
; RV32I-NEXT: srli a2, a2, 21
; RV32I-NEXT: add a2, a0, a2
; RV32I-NEXT: srli a3, a2, 11
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a1, a1, a0
; RV32I-NEXT: slli a0, a1, 21
; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: srai a1, a1, 11
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_2048:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a2, a1, 31
; RV32ZBT-NEXT: srli a2, a2, 21
; RV32ZBT-NEXT: add a2, a0, a2
; RV32ZBT-NEXT: sltu a0, a2, a0
; RV32ZBT-NEXT: add a1, a1, a0
; RV32ZBT-NEXT: fsri a0, a2, a1, 11
; RV32ZBT-NEXT: srai a1, a1, 11
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_2048:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: srli a1, a1, 53
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 11
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_2048:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: slti a1, a0, 0
; RV64ZBT-NEXT: addi a2, a0, 2047
; RV64ZBT-NEXT: cmov a0, a1, a2, a0
; RV64ZBT-NEXT: srai a0, a0, 11
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, 2048
ret i64 %div
}
define i64 @sdiv64_pow2_negative_2048(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_negative_2048:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a2, a1, 31
; RV32I-NEXT: srli a2, a2, 21
; RV32I-NEXT: add a2, a0, a2
; RV32I-NEXT: srli a3, a2, 11
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a1, a1, a0
; RV32I-NEXT: slli a0, a1, 21
; RV32I-NEXT: or a2, a3, a0
; RV32I-NEXT: neg a0, a2
; RV32I-NEXT: snez a2, a2
; RV32I-NEXT: srai a1, a1, 11
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_negative_2048:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a2, a1, 31
; RV32ZBT-NEXT: srli a2, a2, 21
; RV32ZBT-NEXT: add a2, a0, a2
; RV32ZBT-NEXT: sltu a0, a2, a0
; RV32ZBT-NEXT: add a1, a1, a0
; RV32ZBT-NEXT: fsri a2, a2, a1, 11
; RV32ZBT-NEXT: neg a0, a2
; RV32ZBT-NEXT: snez a2, a2
; RV32ZBT-NEXT: srai a1, a1, 11
; RV32ZBT-NEXT: add a1, a1, a2
; RV32ZBT-NEXT: neg a1, a1
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_negative_2048:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: srli a1, a1, 53
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 11
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_negative_2048:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: slti a1, a0, 0
; RV64ZBT-NEXT: addi a2, a0, 2047
; RV64ZBT-NEXT: cmov a0, a1, a2, a0
; RV64ZBT-NEXT: srai a0, a0, 11
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, -2048
ret i64 %div
}
define i64 @sdiv64_pow2_4096(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_4096:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a2, a1, 31
; RV32I-NEXT: srli a2, a2, 20
; RV32I-NEXT: add a2, a0, a2
; RV32I-NEXT: srli a3, a2, 12
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a1, a1, a0
; RV32I-NEXT: slli a0, a1, 20
; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: srai a1, a1, 12
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_4096:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a2, a1, 31
; RV32ZBT-NEXT: srli a2, a2, 20
; RV32ZBT-NEXT: add a2, a0, a2
; RV32ZBT-NEXT: sltu a0, a2, a0
; RV32ZBT-NEXT: add a1, a1, a0
; RV32ZBT-NEXT: fsri a0, a2, a1, 12
; RV32ZBT-NEXT: srai a1, a1, 12
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_4096:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: srli a1, a1, 52
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 12
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_4096:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srai a1, a0, 63
; RV64ZBT-NEXT: srli a1, a1, 52
; RV64ZBT-NEXT: add a0, a0, a1
; RV64ZBT-NEXT: srai a0, a0, 12
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, 4096
ret i64 %div
}
define i64 @sdiv64_pow2_negative_4096(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_negative_4096:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a2, a1, 31
; RV32I-NEXT: srli a2, a2, 20
; RV32I-NEXT: add a2, a0, a2
; RV32I-NEXT: srli a3, a2, 12
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a1, a1, a0
; RV32I-NEXT: slli a0, a1, 20
; RV32I-NEXT: or a2, a3, a0
; RV32I-NEXT: neg a0, a2
; RV32I-NEXT: snez a2, a2
; RV32I-NEXT: srai a1, a1, 12
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_negative_4096:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a2, a1, 31
; RV32ZBT-NEXT: srli a2, a2, 20
; RV32ZBT-NEXT: add a2, a0, a2
; RV32ZBT-NEXT: sltu a0, a2, a0
; RV32ZBT-NEXT: add a1, a1, a0
; RV32ZBT-NEXT: fsri a2, a2, a1, 12
; RV32ZBT-NEXT: neg a0, a2
; RV32ZBT-NEXT: snez a2, a2
; RV32ZBT-NEXT: srai a1, a1, 12
; RV32ZBT-NEXT: add a1, a1, a2
; RV32ZBT-NEXT: neg a1, a1
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_negative_4096:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: srli a1, a1, 52
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 12
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_negative_4096:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srai a1, a0, 63
; RV64ZBT-NEXT: srli a1, a1, 52
; RV64ZBT-NEXT: add a0, a0, a1
; RV64ZBT-NEXT: srai a0, a0, 12
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, -4096
ret i64 %div
}
define i64 @sdiv64_pow2_65536(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_65536:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a2, a1, 31
; RV32I-NEXT: srli a2, a2, 16
; RV32I-NEXT: add a2, a0, a2
; RV32I-NEXT: srli a3, a2, 16
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a1, a1, a0
; RV32I-NEXT: slli a0, a1, 16
; RV32I-NEXT: or a0, a3, a0
; RV32I-NEXT: srai a1, a1, 16
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_65536:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a2, a1, 31
; RV32ZBT-NEXT: srli a2, a2, 16
; RV32ZBT-NEXT: add a2, a0, a2
; RV32ZBT-NEXT: sltu a0, a2, a0
; RV32ZBT-NEXT: add a1, a1, a0
; RV32ZBT-NEXT: fsri a0, a2, a1, 16
; RV32ZBT-NEXT: srai a1, a1, 16
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_65536:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: srli a1, a1, 48
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 16
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_65536:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srai a1, a0, 63
; RV64ZBT-NEXT: srli a1, a1, 48
; RV64ZBT-NEXT: add a0, a0, a1
; RV64ZBT-NEXT: srai a0, a0, 16
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, 65536
ret i64 %div
}
define i64 @sdiv64_pow2_negative_65536(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_negative_65536:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srai a2, a1, 31
; RV32I-NEXT: srli a2, a2, 16
; RV32I-NEXT: add a2, a0, a2
; RV32I-NEXT: srli a3, a2, 16
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a1, a1, a0
; RV32I-NEXT: slli a0, a1, 16
; RV32I-NEXT: or a2, a3, a0
; RV32I-NEXT: neg a0, a2
; RV32I-NEXT: snez a2, a2
; RV32I-NEXT: srai a1, a1, 16
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_negative_65536:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srai a2, a1, 31
; RV32ZBT-NEXT: srli a2, a2, 16
; RV32ZBT-NEXT: add a2, a0, a2
; RV32ZBT-NEXT: sltu a0, a2, a0
; RV32ZBT-NEXT: add a1, a1, a0
; RV32ZBT-NEXT: fsri a2, a2, a1, 16
; RV32ZBT-NEXT: neg a0, a2
; RV32ZBT-NEXT: snez a2, a2
; RV32ZBT-NEXT: srai a1, a1, 16
; RV32ZBT-NEXT: add a1, a1, a2
; RV32ZBT-NEXT: neg a1, a1
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_negative_65536:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: srli a1, a1, 48
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 16
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_negative_65536:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srai a1, a0, 63
; RV64ZBT-NEXT: srli a1, a1, 48
; RV64ZBT-NEXT: add a0, a0, a1
; RV64ZBT-NEXT: srai a0, a0, 16
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, -65536
ret i64 %div
}
define i64 @sdiv64_pow2_8589934592(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_8589934592:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srli a2, a1, 31
; RV32I-NEXT: add a2, a1, a2
; RV32I-NEXT: srai a1, a1, 31
; RV32I-NEXT: add a1, a0, a1
; RV32I-NEXT: sltu a0, a1, a0
; RV32I-NEXT: add a1, a2, a0
; RV32I-NEXT: srai a0, a1, 1
; RV32I-NEXT: srai a1, a1, 31
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_8589934592:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srli a2, a1, 31
; RV32ZBT-NEXT: add a2, a1, a2
; RV32ZBT-NEXT: srai a1, a1, 31
; RV32ZBT-NEXT: fsri a1, a1, a1, 31
; RV32ZBT-NEXT: add a1, a0, a1
; RV32ZBT-NEXT: sltu a0, a1, a0
; RV32ZBT-NEXT: add a1, a2, a0
; RV32ZBT-NEXT: srai a0, a1, 1
; RV32ZBT-NEXT: srai a1, a1, 31
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_8589934592:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: srli a1, a1, 31
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 33
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_8589934592:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srai a1, a0, 63
; RV64ZBT-NEXT: srli a1, a1, 31
; RV64ZBT-NEXT: add a0, a0, a1
; RV64ZBT-NEXT: srai a0, a0, 33
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, 8589934592 ; 2^33
ret i64 %div
}
define i64 @sdiv64_pow2_negative_8589934592(i64 %a) {
; RV32I-LABEL: sdiv64_pow2_negative_8589934592:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: srli a2, a1, 31
; RV32I-NEXT: add a2, a1, a2
; RV32I-NEXT: srai a1, a1, 31
; RV32I-NEXT: add a1, a0, a1
; RV32I-NEXT: sltu a0, a1, a0
; RV32I-NEXT: add a0, a2, a0
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: srai a0, a0, 1
; RV32I-NEXT: snez a2, a0
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: sdiv64_pow2_negative_8589934592:
; RV32ZBT: # %bb.0: # %entry
; RV32ZBT-NEXT: srli a2, a1, 31
; RV32ZBT-NEXT: add a2, a1, a2
; RV32ZBT-NEXT: srai a1, a1, 31
; RV32ZBT-NEXT: fsri a1, a1, a1, 31
; RV32ZBT-NEXT: add a1, a0, a1
; RV32ZBT-NEXT: sltu a0, a1, a0
; RV32ZBT-NEXT: add a0, a2, a0
; RV32ZBT-NEXT: srai a1, a0, 31
; RV32ZBT-NEXT: srai a0, a0, 1
; RV32ZBT-NEXT: snez a2, a0
; RV32ZBT-NEXT: add a1, a1, a2
; RV32ZBT-NEXT: neg a1, a1
; RV32ZBT-NEXT: neg a0, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: sdiv64_pow2_negative_8589934592:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: srli a1, a1, 31
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: srai a0, a0, 33
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: ret
;
; RV64ZBT-LABEL: sdiv64_pow2_negative_8589934592:
; RV64ZBT: # %bb.0: # %entry
; RV64ZBT-NEXT: srai a1, a0, 63
; RV64ZBT-NEXT: srli a1, a1, 31
; RV64ZBT-NEXT: add a0, a0, a1
; RV64ZBT-NEXT: srai a0, a0, 33
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: ret
entry:
%div = sdiv i64 %a, -8589934592 ; -2^33
ret i64 %div
}