forked from OSchip/llvm-project
[GlobalISel] Add sdiv exact (X, constant) -> mul combine.
This port of the SDAG optimization is only for exact sdiv case. Differential Revision: https://reviews.llvm.org/D130517
This commit is contained in:
parent
ebd0249fcf
commit
4cf3db41da
|
@ -647,6 +647,13 @@ public:
|
|||
bool matchUDivByConst(MachineInstr &MI);
|
||||
void applyUDivByConst(MachineInstr &MI);
|
||||
|
||||
/// Given an G_SDIV \p MI expressing a signed divide by constant, return an
|
||||
/// expression that implements it by multiplying by a magic number.
|
||||
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
|
||||
MachineInstr *buildSDivUsingMul(MachineInstr &MI);
|
||||
bool matchSDivByConst(MachineInstr &MI);
|
||||
void applySDivByConst(MachineInstr &MI);
|
||||
|
||||
// G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
|
||||
bool matchUMulHToLShr(MachineInstr &MI);
|
||||
void applyUMulHToLShr(MachineInstr &MI);
|
||||
|
|
|
@ -764,7 +764,13 @@ def udiv_by_const : GICombineRule<
|
|||
[{ return Helper.matchUDivByConst(*${root}); }]),
|
||||
(apply [{ Helper.applyUDivByConst(*${root}); }])>;
|
||||
|
||||
def intdiv_combines : GICombineGroup<[udiv_by_const]>;
|
||||
def sdiv_by_const : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_SDIV):$root,
|
||||
[{ return Helper.matchSDivByConst(*${root}); }]),
|
||||
(apply [{ Helper.applySDivByConst(*${root}); }])>;
|
||||
|
||||
def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const]>;
|
||||
|
||||
def reassoc_ptradd : GICombineRule<
|
||||
(defs root:$root, build_fn_matchinfo:$matchinfo),
|
||||
|
|
|
@ -4935,6 +4935,108 @@ void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
|
|||
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchSDivByConst(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register RHS = MI.getOperand(2).getReg();
|
||||
LLT DstTy = MRI.getType(Dst);
|
||||
|
||||
auto &MF = *MI.getMF();
|
||||
AttributeList Attr = MF.getFunction().getAttributes();
|
||||
const auto &TLI = getTargetLowering();
|
||||
LLVMContext &Ctx = MF.getFunction().getContext();
|
||||
auto &DL = MF.getDataLayout();
|
||||
if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
|
||||
return false;
|
||||
|
||||
// Don't do this for minsize because the instruction sequence is usually
|
||||
// larger.
|
||||
if (MF.getFunction().hasMinSize())
|
||||
return false;
|
||||
|
||||
// If the sdiv has an 'exact' flag we can use a simpler lowering.
|
||||
if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
|
||||
return matchUnaryPredicate(
|
||||
MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); });
|
||||
}
|
||||
|
||||
// Don't support the general case for now.
|
||||
return false;
|
||||
}
|
||||
|
||||
void CombinerHelper::applySDivByConst(MachineInstr &MI) {
|
||||
auto *NewMI = buildSDivUsingMul(MI);
|
||||
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
|
||||
}
|
||||
|
||||
MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
|
||||
auto &SDiv = cast<GenericMachineInstr>(MI);
|
||||
Register Dst = SDiv.getReg(0);
|
||||
Register LHS = SDiv.getReg(1);
|
||||
Register RHS = SDiv.getReg(2);
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ScalarTy = Ty.getScalarType();
|
||||
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
|
||||
LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
|
||||
auto &MIB = Builder;
|
||||
MIB.setInstrAndDebugLoc(MI);
|
||||
|
||||
bool UseSRA = false;
|
||||
SmallVector<Register, 16> Shifts, Factors;
|
||||
|
||||
auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
|
||||
bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).hasValue();
|
||||
|
||||
auto BuildSDIVPattern = [&](const Constant *C) {
|
||||
// Don't recompute inverses for each splat element.
|
||||
if (IsSplat && !Factors.empty()) {
|
||||
Shifts.push_back(Shifts[0]);
|
||||
Factors.push_back(Factors[0]);
|
||||
return true;
|
||||
}
|
||||
|
||||
auto *CI = cast<ConstantInt>(C);
|
||||
APInt Divisor = CI->getValue();
|
||||
unsigned Shift = Divisor.countTrailingZeros();
|
||||
if (Shift) {
|
||||
Divisor.ashrInPlace(Shift);
|
||||
UseSRA = true;
|
||||
}
|
||||
|
||||
// Calculate the multiplicative inverse modulo BW.
|
||||
// 2^W requires W + 1 bits, so we have to extend and then truncate.
|
||||
unsigned W = Divisor.getBitWidth();
|
||||
APInt Factor = Divisor.zext(W + 1)
|
||||
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
|
||||
.trunc(W);
|
||||
Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
|
||||
Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
|
||||
return true;
|
||||
};
|
||||
|
||||
// Collect all magic values from the build vector.
|
||||
bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
|
||||
(void)Matched;
|
||||
assert(Matched && "Expected unary predicate match to succeed");
|
||||
|
||||
Register Shift, Factor;
|
||||
if (Ty.isVector()) {
|
||||
Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
|
||||
Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
|
||||
} else {
|
||||
Shift = Shifts[0];
|
||||
Factor = Factors[0];
|
||||
}
|
||||
|
||||
Register Res = LHS;
|
||||
|
||||
if (UseSRA)
|
||||
Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
|
||||
|
||||
return MIB.buildMul(Ty, Res, Factor);
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_UMULH);
|
||||
Register RHS = MI.getOperand(2).getReg();
|
||||
|
|
|
@ -1077,7 +1077,7 @@ Optional<APInt> llvm::getIConstantSplatVal(const Register Reg,
|
|||
return None;
|
||||
}
|
||||
|
||||
Optional<APInt> getIConstantSplatVal(const MachineInstr &MI,
|
||||
Optional<APInt> llvm::getIConstantSplatVal(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
return getIConstantSplatVal(MI.getOperand(0).getReg(), MRI);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
|
||||
--- |
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
define void @sdiv_exact() { ret void }
|
||||
define void @sdiv_noexact() { ret void }
|
||||
define void @sdiv_exact_minsize() #0 { ret void }
|
||||
define void @div_v4s32() { ret void }
|
||||
define void @div_v4s32_splat() { ret void }
|
||||
|
||||
attributes #0 = { minsize }
|
||||
|
||||
...
|
||||
---
|
||||
name: sdiv_exact
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0
|
||||
|
||||
; CHECK-LABEL: name: sdiv_exact
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -991146299
|
||||
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = exact G_ASHR [[COPY]], [[C]](s32)
|
||||
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[ASHR]], [[C1]]
|
||||
; CHECK-NEXT: $w0 = COPY [[MUL]](s32)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 104
|
||||
%2:_(s32) = exact G_SDIV %0, %1
|
||||
$w0 = COPY %2(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: sdiv_noexact
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0
|
||||
|
||||
; CHECK-LABEL: name: sdiv_noexact
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 104
|
||||
; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[C]]
|
||||
; CHECK-NEXT: $w0 = COPY [[SDIV]](s32)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 104
|
||||
%2:_(s32) = G_SDIV %0, %1
|
||||
$w0 = COPY %2(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: sdiv_exact_minsize
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0
|
||||
|
||||
; CHECK-LABEL: name: sdiv_exact_minsize
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 104
|
||||
; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = exact G_SDIV [[COPY]], [[C]]
|
||||
; CHECK-NEXT: $w0 = COPY [[SDIV]](s32)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 104
|
||||
%2:_(s32) = exact G_SDIV %0, %1
|
||||
$w0 = COPY %2(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: div_v4s32
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: div_v4s32
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -991146299
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 954437177
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C2]](s32), [[C1]](s32), [[C2]](s32)
|
||||
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = exact G_ASHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<4 x s32>) = G_MUL [[ASHR]], [[BUILD_VECTOR1]]
|
||||
; CHECK-NEXT: $q0 = COPY [[MUL]](<4 x s32>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%c1:_(s32) = G_CONSTANT i32 104
|
||||
%c2:_(s32) = G_CONSTANT i32 72
|
||||
%1:_(<4 x s32>) = G_BUILD_VECTOR %c1(s32), %c2(s32), %c1(s32), %c2(s32)
|
||||
%3:_(<4 x s32>) = exact G_SDIV %0, %1
|
||||
$q0 = COPY %3(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: div_v4s32_splat
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: div_v4s32_splat
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -991146299
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
|
||||
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(<4 x s32>) = exact G_ASHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<4 x s32>) = G_MUL [[ASHR]], [[BUILD_VECTOR1]]
|
||||
; CHECK-NEXT: $q0 = COPY [[MUL]](<4 x s32>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%c1:_(s32) = G_CONSTANT i32 104
|
||||
%1:_(<4 x s32>) = G_BUILD_VECTOR %c1(s32), %c1(s32), %c1(s32), %c1(s32)
|
||||
%3:_(<4 x s32>) = exact G_SDIV %0, %1
|
||||
$q0 = COPY %3(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
Loading…
Reference in New Issue