forked from OSchip/llvm-project
[BypassSlowDivision] Do not bypass division of hash-like values
Disable bypassing if one of the operands looks like a hash value. Slow division often occurs in hashtable implementations and fast division is never taken there because a hash value is extremely unlikely to have enough upper bits set to zero. A value is considered to be hash-like if it is produced by 1) XOR operation 2) Multiplication by a constant wider than the shorter type 3) PHI node with all incoming values being hash-like Differential Revision: https://reviews.llvm.org/D28200 llvm-svn: 299329
This commit is contained in:
parent
dddce31eb4
commit
fca527af5c
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
|
@ -81,17 +82,18 @@ namespace llvm {
|
|||
|
||||
typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
|
||||
typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
|
||||
typedef SmallPtrSet<Instruction *, 4> VisitedSetTy;
|
||||
}
|
||||
|
||||
namespace {
|
||||
enum ValueRange {
|
||||
/// Operand definitely fits into BypassType. No runtime checks are needed.
|
||||
VALRNG_SHORT,
|
||||
VALRNG_KNOWN_SHORT,
|
||||
/// A runtime check is required, as value range is unknown.
|
||||
VALRNG_UNKNOWN,
|
||||
/// Operand is unlikely to fit into BypassType. The bypassing should be
|
||||
/// disabled.
|
||||
VALRNG_LONG
|
||||
VALRNG_LIKELY_LONG
|
||||
};
|
||||
|
||||
class FastDivInsertionTask {
|
||||
|
@ -100,7 +102,8 @@ class FastDivInsertionTask {
|
|||
IntegerType *BypassType = nullptr;
|
||||
BasicBlock *MainBB = nullptr;
|
||||
|
||||
ValueRange getValueRange(Value *Op);
|
||||
bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
|
||||
ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
|
||||
QuotRemWithBB createSlowBB(BasicBlock *Successor);
|
||||
QuotRemWithBB createFastBB(BasicBlock *Successor);
|
||||
QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
|
||||
|
@ -187,8 +190,65 @@ Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
|
|||
return isDivisionOp() ? Value.Quotient : Value.Remainder;
|
||||
}
|
||||
|
||||
/// \brief Check if a value looks like a hash.
|
||||
///
|
||||
/// The routine is expected to detect values computed using the most common hash
|
||||
/// algorithms. Typically, hash computations end with one of the following
|
||||
/// instructions:
|
||||
///
|
||||
/// 1) MUL with a constant wider than BypassType
|
||||
/// 2) XOR instruction
|
||||
///
|
||||
/// And even if we are wrong and the value is not a hash, it is still quite
|
||||
/// unlikely that such values will fit into BypassType.
|
||||
///
|
||||
/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
|
||||
/// It is implemented as a depth-first search for values that look neither long
|
||||
/// nor hash-like.
|
||||
bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
|
||||
Instruction *I = dyn_cast<Instruction>(V);
|
||||
if (!I)
|
||||
return false;
|
||||
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::Xor:
|
||||
return true;
|
||||
case Instruction::Mul: {
|
||||
// After Constant Hoisting pass, long constants may be represented as
|
||||
// bitcast instructions. As a result, some constants may look like an
|
||||
// instruction at first, and an additional check is necessary to find out if
|
||||
// an operand is actually a constant.
|
||||
Value *Op1 = I->getOperand(1);
|
||||
ConstantInt *C = dyn_cast<ConstantInt>(Op1);
|
||||
if (!C && isa<BitCastInst>(Op1))
|
||||
C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
|
||||
return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
|
||||
}
|
||||
case Instruction::PHI: {
|
||||
// Stop IR traversal in case of a crazy input code. This limits recursion
|
||||
// depth.
|
||||
if (Visited.size() >= 16)
|
||||
return false;
|
||||
// Do not visit nodes that have been visited already. We return true because
|
||||
// it means that we couldn't find any value that doesn't look hash-like.
|
||||
if (Visited.find(I) != Visited.end())
|
||||
return true;
|
||||
Visited.insert(I);
|
||||
return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
|
||||
// Ignore undef values as they probably don't affect the division
|
||||
// operands.
|
||||
return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
|
||||
isa<UndefValue>(V);
|
||||
});
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if an integer value fits into our bypass type.
|
||||
ValueRange FastDivInsertionTask::getValueRange(Value *V) {
|
||||
ValueRange FastDivInsertionTask::getValueRange(Value *V,
|
||||
VisitedSetTy &Visited) {
|
||||
unsigned ShortLen = BypassType->getBitWidth();
|
||||
unsigned LongLen = V->getType()->getIntegerBitWidth();
|
||||
|
||||
|
@ -201,10 +261,17 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V) {
|
|||
computeKnownBits(V, Zeros, Ones, DL);
|
||||
|
||||
if (Zeros.countLeadingOnes() >= HiBits)
|
||||
return VALRNG_SHORT;
|
||||
return VALRNG_KNOWN_SHORT;
|
||||
|
||||
if (Ones.countLeadingZeros() < HiBits)
|
||||
return VALRNG_LONG;
|
||||
return VALRNG_LIKELY_LONG;
|
||||
|
||||
// Long integer divisions are often used in hashtable implementations. It's
|
||||
// not worth bypassing such divisions because hash values are extremely
|
||||
// unlikely to have enough leading zeros. The call below tries to detect
|
||||
// values that are unlikely to fit BypassType (including hashes).
|
||||
if (isHashLikeValue(V, Visited))
|
||||
return VALRNG_LIKELY_LONG;
|
||||
|
||||
return VALRNG_UNKNOWN;
|
||||
}
|
||||
|
@ -308,16 +375,18 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
|
|||
return None;
|
||||
}
|
||||
|
||||
ValueRange DividendRange = getValueRange(Dividend);
|
||||
if (DividendRange == VALRNG_LONG)
|
||||
VisitedSetTy SetL;
|
||||
ValueRange DividendRange = getValueRange(Dividend, SetL);
|
||||
if (DividendRange == VALRNG_LIKELY_LONG)
|
||||
return None;
|
||||
|
||||
ValueRange DivisorRange = getValueRange(Divisor);
|
||||
if (DivisorRange == VALRNG_LONG)
|
||||
VisitedSetTy SetR;
|
||||
ValueRange DivisorRange = getValueRange(Divisor, SetR);
|
||||
if (DivisorRange == VALRNG_LIKELY_LONG)
|
||||
return None;
|
||||
|
||||
bool DividendShort = (DividendRange == VALRNG_SHORT);
|
||||
bool DivisorShort = (DivisorRange == VALRNG_SHORT);
|
||||
bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
|
||||
bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
|
||||
|
||||
if (DividendShort && DivisorShort) {
|
||||
// If both operands are known to be short then just replace the long
|
||||
|
|
|
@ -93,3 +93,124 @@ define void @Test_special_case(i32 %a, i64 %b, i64* %retptr) {
|
|||
store i64 %res, i64* %retptr
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Do not bypass a division if one of the operands looks like a hash value.
|
||||
define void @Test_dont_bypass_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
|
||||
; CHECK-LABEL: @Test_dont_bypass_xor(
|
||||
; CHECK-NEXT: [[C:%.*]] = xor i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[RES:%.*]] = udiv i64 [[C]], [[L:%.*]]
|
||||
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%c = xor i64 %a, %b
|
||||
%res = udiv i64 %c, %l
|
||||
store i64 %res, i64* %retptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @Test_dont_bypass_phi_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
|
||||
; CHECK-LABEL: @Test_dont_bypass_phi_xor(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[B:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[MERGE:%.*]], label [[XORPATH:%.*]]
|
||||
; CHECK: xorpath:
|
||||
; CHECK-NEXT: [[C:%.*]] = xor i64 [[A:%.*]], [[B]]
|
||||
; CHECK-NEXT: br label [[MERGE]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: [[E:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[C]], [[XORPATH]] ]
|
||||
; CHECK-NEXT: [[RES:%.*]] = sdiv i64 [[E]], [[L:%.*]]
|
||||
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i64 %b, 0
|
||||
br i1 %cmp, label %merge, label %xorpath
|
||||
|
||||
xorpath:
|
||||
%c = xor i64 %a, %b
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
%e = phi i64 [ undef, %entry ], [ %c, %xorpath ]
|
||||
%res = sdiv i64 %e, %l
|
||||
store i64 %res, i64* %retptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @Test_dont_bypass_mul_long_const(i64 %a, i64 %l, i64* %retptr) {
|
||||
; CHECK-LABEL: @Test_dont_bypass_mul_long_const(
|
||||
; CHECK-NEXT: [[C:%.*]] = mul i64 [[A:%.*]], 5229553307
|
||||
; CHECK-NEXT: [[RES:%.*]] = urem i64 [[C]], [[L:%.*]]
|
||||
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%c = mul i64 %a, 5229553307 ; the constant doesn't fit 32 bits
|
||||
%res = urem i64 %c, %l
|
||||
store i64 %res, i64* %retptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @Test_bypass_phi_mul_const(i64 %a, i64 %b, i64* %retptr) {
|
||||
; CHECK-LABEL: @Test_bypass_phi_mul_const(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[A_MUL:%.*]] = mul nsw i64 [[A:%.*]], 34806414968801
|
||||
; CHECK-NEXT: [[P:%.*]] = icmp sgt i64 [[A]], [[B:%.*]]
|
||||
; CHECK-NEXT: br i1 [[P]], label [[BRANCH:%.*]], label [[MERGE:%.*]]
|
||||
; CHECK: branch:
|
||||
; CHECK-NEXT: br label [[MERGE]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: [[LHS:%.*]] = phi i64 [ 42, [[BRANCH]] ], [ [[A_MUL]], [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[LHS]], [[B]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP8:%.*]]
|
||||
; CHECK: [[TMP4:%.*]] = trunc i64 [[B]] to i32
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[LHS]] to i32
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
|
||||
; CHECK-NEXT: br label [[TMP10:%.*]]
|
||||
; CHECK: [[TMP9:%.*]] = sdiv i64 [[LHS]], [[B]]
|
||||
; CHECK-NEXT: br label [[TMP10]]
|
||||
; CHECK: [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ]
|
||||
; CHECK-NEXT: store i64 [[TMP11]], i64* [[RETPTR:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a.mul = mul nsw i64 %a, 34806414968801
|
||||
%p = icmp sgt i64 %a, %b
|
||||
br i1 %p, label %branch, label %merge
|
||||
|
||||
branch:
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
%lhs = phi i64 [ 42, %branch ], [ %a.mul, %entry ]
|
||||
%res = sdiv i64 %lhs, %b
|
||||
store i64 %res, i64* %retptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @Test_bypass_mul_short_const(i64 %a, i64 %l, i64* %retptr) {
|
||||
; CHECK-LABEL: @Test_bypass_mul_short_const(
|
||||
; CHECK-NEXT: [[C:%.*]] = mul i64 [[A:%.*]], -42
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[C]], [[L:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]]
|
||||
; CHECK: [[TMP5:%.*]] = trunc i64 [[L]] to i32
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[C]] to i32
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = urem i32 [[TMP6]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
|
||||
; CHECK-NEXT: br label [[TMP11:%.*]]
|
||||
; CHECK: [[TMP10:%.*]] = urem i64 [[C]], [[L]]
|
||||
; CHECK-NEXT: br label [[TMP11]]
|
||||
; CHECK: [[TMP12:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ]
|
||||
; CHECK-NEXT: store i64 [[TMP12]], i64* [[RETPTR:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%c = mul i64 %a, -42
|
||||
%res = urem i64 %c, %l
|
||||
store i64 %res, i64* %retptr
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue