[InstCombine] make icmp-mul fold more efficient

There's probably a lot more like this (see also comments in D33338 about responsibility), 
but I suspect we don't usually get a visible manifestation.

Given the recent interest in improving InstCombine efficiency, another potential micro-opt
that could be repeated several times in this function: morph the existing icmp pred/operands
instead of creating a new instruction.

llvm-svn: 303860
This commit is contained in:
Sanjay Patel 2017-05-25 14:13:57 +00:00
parent 32d0d38679
commit 5150612012
2 changed files with 8 additions and 6 deletions

View File

@ -3057,19 +3057,21 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
break;
const APInt *C;
if (match(BO0->getOperand(1), m_APInt(C))) {
if (match(BO0->getOperand(1), m_APInt(C)) && *C != 0 && *C != 1) {
// icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
// Mask = -1 >> count-trailing-zeros(C).
if (*C != 0 && *C != 1) {
// FIXME: If trailing zeros is 0, don't bother creating Mask.
if (unsigned TZs = C->countTrailingZeros()) {
Constant *Mask = ConstantInt::get(
BO0->getType(),
APInt::getLowBitsSet(C->getBitWidth(),
C->getBitWidth() - C->countTrailingZeros()));
APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
return new ICmpInst(Pred, And1, And2);
}
// If there are no trailing zeros in the multiplier, just eliminate
// the multiplies (no masking is needed):
// icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
}
break;
}

View File

@ -2920,7 +2920,7 @@ define i1 @eq_mul_constants(i32 %x, i32 %y) {
define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @eq_mul_constants_splat(
; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> %y, %x
; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> %x, %y
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%A = mul <2 x i32> %x, <i32 5, i32 5>