forked from OSchip/llvm-project
Factor out the multiply analysis code in ComputeMaskedBits and apply it to the
overflow checking multiply intrinsic as well. Add a test for this, updating the test from grep to FileCheck. llvm-svn: 153028
This commit is contained in:
parent
129f9ef669
commit
fa30607eca
|
@ -130,6 +130,71 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
|
|||
}
|
||||
}
|
||||
|
||||
static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
|
||||
const APInt &Mask,
|
||||
APInt &KnownZero, APInt &KnownOne,
|
||||
APInt &KnownZero2, APInt &KnownOne2,
|
||||
const TargetData *TD, unsigned Depth) {
|
||||
unsigned BitWidth = Mask.getBitWidth();
|
||||
APInt Mask2 = APInt::getAllOnesValue(BitWidth);
|
||||
ComputeMaskedBits(Op1, Mask2, KnownZero, KnownOne, TD, Depth+1);
|
||||
ComputeMaskedBits(Op0, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
|
||||
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
||||
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
|
||||
|
||||
bool isKnownNegative = false;
|
||||
bool isKnownNonNegative = false;
|
||||
// If the multiplication is known not to overflow, compute the sign bit.
|
||||
if (Mask.isNegative() && NSW) {
|
||||
if (Op0 == Op1) {
|
||||
// The product of a number with itself is non-negative.
|
||||
isKnownNonNegative = true;
|
||||
} else {
|
||||
bool isKnownNonNegativeOp1 = KnownZero.isNegative();
|
||||
bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
|
||||
bool isKnownNegativeOp1 = KnownOne.isNegative();
|
||||
bool isKnownNegativeOp0 = KnownOne2.isNegative();
|
||||
// The product of two numbers with the same sign is non-negative.
|
||||
isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
|
||||
(isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
|
||||
// The product of a negative number and a non-negative number is either
|
||||
// negative or zero.
|
||||
if (!isKnownNonNegative)
|
||||
isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
|
||||
isKnownNonZero(Op0, TD, Depth)) ||
|
||||
(isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
|
||||
isKnownNonZero(Op1, TD, Depth));
|
||||
}
|
||||
}
|
||||
|
||||
// If low bits are zero in either operand, output low known-0 bits.
|
||||
// Also compute a conserative estimate for high known-0 bits.
|
||||
// More trickiness is possible, but this is sufficient for the
|
||||
// interesting case of alignment computation.
|
||||
KnownOne.clearAllBits();
|
||||
unsigned TrailZ = KnownZero.countTrailingOnes() +
|
||||
KnownZero2.countTrailingOnes();
|
||||
unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
|
||||
KnownZero2.countLeadingOnes(),
|
||||
BitWidth) - BitWidth;
|
||||
|
||||
TrailZ = std::min(TrailZ, BitWidth);
|
||||
LeadZ = std::min(LeadZ, BitWidth);
|
||||
KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
|
||||
APInt::getHighBitsSet(BitWidth, LeadZ);
|
||||
KnownZero &= Mask;
|
||||
|
||||
// Only make use of no-wrap flags if we failed to compute the sign bit
|
||||
// directly. This matters if the multiplication always overflows, in
|
||||
// which case we prefer to follow the result of the direct computation,
|
||||
// though as the program is invoking undefined behaviour we can choose
|
||||
// whatever we like here.
|
||||
if (isKnownNonNegative && !KnownOne.isNegative())
|
||||
KnownZero.setBit(BitWidth - 1);
|
||||
else if (isKnownNegative && !KnownZero.isNegative())
|
||||
KnownOne.setBit(BitWidth - 1);
|
||||
}
|
||||
|
||||
/// ComputeMaskedBits - Determine which of the bits specified in Mask are
|
||||
/// known to be either zero or one and return them in the KnownZero/KnownOne
|
||||
/// bit sets. This code only analyzes bits in Mask, in order to short-circuit
|
||||
|
@ -294,68 +359,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
|
|||
return;
|
||||
}
|
||||
case Instruction::Mul: {
|
||||
APInt Mask2 = APInt::getAllOnesValue(BitWidth);
|
||||
ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
|
||||
ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
|
||||
Depth+1);
|
||||
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
|
||||
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
|
||||
|
||||
bool isKnownNegative = false;
|
||||
bool isKnownNonNegative = false;
|
||||
// If the multiplication is known not to overflow, compute the sign bit.
|
||||
if (Mask.isNegative() &&
|
||||
cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap()) {
|
||||
Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0);
|
||||
if (Op1 == Op2) {
|
||||
// The product of a number with itself is non-negative.
|
||||
isKnownNonNegative = true;
|
||||
} else {
|
||||
bool isKnownNonNegative1 = KnownZero.isNegative();
|
||||
bool isKnownNonNegative2 = KnownZero2.isNegative();
|
||||
bool isKnownNegative1 = KnownOne.isNegative();
|
||||
bool isKnownNegative2 = KnownOne2.isNegative();
|
||||
// The product of two numbers with the same sign is non-negative.
|
||||
isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) ||
|
||||
(isKnownNonNegative1 && isKnownNonNegative2);
|
||||
// The product of a negative number and a non-negative number is either
|
||||
// negative or zero.
|
||||
if (!isKnownNonNegative)
|
||||
isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 &&
|
||||
isKnownNonZero(Op2, TD, Depth)) ||
|
||||
(isKnownNegative2 && isKnownNonNegative1 &&
|
||||
isKnownNonZero(Op1, TD, Depth));
|
||||
}
|
||||
}
|
||||
|
||||
// If low bits are zero in either operand, output low known-0 bits.
|
||||
// Also compute a conserative estimate for high known-0 bits.
|
||||
// More trickiness is possible, but this is sufficient for the
|
||||
// interesting case of alignment computation.
|
||||
KnownOne.clearAllBits();
|
||||
unsigned TrailZ = KnownZero.countTrailingOnes() +
|
||||
KnownZero2.countTrailingOnes();
|
||||
unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
|
||||
KnownZero2.countLeadingOnes(),
|
||||
BitWidth) - BitWidth;
|
||||
|
||||
TrailZ = std::min(TrailZ, BitWidth);
|
||||
LeadZ = std::min(LeadZ, BitWidth);
|
||||
KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
|
||||
APInt::getHighBitsSet(BitWidth, LeadZ);
|
||||
KnownZero &= Mask;
|
||||
|
||||
// Only make use of no-wrap flags if we failed to compute the sign bit
|
||||
// directly. This matters if the multiplication always overflows, in
|
||||
// which case we prefer to follow the result of the direct computation,
|
||||
// though as the program is invoking undefined behaviour we can choose
|
||||
// whatever we like here.
|
||||
if (isKnownNonNegative && !KnownOne.isNegative())
|
||||
KnownZero.setBit(BitWidth - 1);
|
||||
else if (isKnownNegative && !KnownZero.isNegative())
|
||||
KnownOne.setBit(BitWidth - 1);
|
||||
|
||||
return;
|
||||
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
|
||||
ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
|
||||
Mask, KnownZero, KnownOne, KnownZero2, KnownOne2,
|
||||
TD, Depth);
|
||||
break;
|
||||
}
|
||||
case Instruction::UDiv: {
|
||||
// For the purposes of computing leading zeros we can conservatively
|
||||
|
@ -777,6 +785,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
|
|||
KnownZero, KnownOne, KnownZero2, KnownOne2,
|
||||
TD, Depth);
|
||||
break;
|
||||
case Intrinsic::umul_with_overflow:
|
||||
case Intrinsic::smul_with_overflow:
|
||||
ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
|
||||
false, Mask, KnownZero, KnownOne,
|
||||
KnownZero2, KnownOne2, TD, Depth);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,116 +1,184 @@
|
|||
; This test makes sure that mul instructions are properly eliminated.
|
||||
; RUN: opt < %s -instcombine -S | not grep mul
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
define i32 @test1(i32 %A) {
|
||||
; CHECK: @test1
|
||||
%B = mul i32 %A, 1 ; <i32> [#uses=1]
|
||||
ret i32 %B
|
||||
; CHECK: ret i32 %A
|
||||
}
|
||||
|
||||
define i32 @test2(i32 %A) {
|
||||
; CHECK: @test2
|
||||
; Should convert to an add instruction
|
||||
%B = mul i32 %A, 2 ; <i32> [#uses=1]
|
||||
ret i32 %B
|
||||
; CHECK: shl i32 %A, 1
|
||||
}
|
||||
|
||||
define i32 @test3(i32 %A) {
|
||||
; CHECK: @test3
|
||||
; This should disappear entirely
|
||||
%B = mul i32 %A, 0 ; <i32> [#uses=1]
|
||||
ret i32 %B
|
||||
; CHECK: ret i32 0
|
||||
}
|
||||
|
||||
define double @test4(double %A) {
|
||||
; CHECK: @test4
|
||||
; This is safe for FP
|
||||
%B = fmul double 1.000000e+00, %A ; <double> [#uses=1]
|
||||
ret double %B
|
||||
; CHECK: ret double %A
|
||||
}
|
||||
|
||||
define i32 @test5(i32 %A) {
|
||||
; CHECK: @test5
|
||||
%B = mul i32 %A, 8 ; <i32> [#uses=1]
|
||||
ret i32 %B
|
||||
; CHECK: shl i32 %A, 3
|
||||
}
|
||||
|
||||
define i8 @test6(i8 %A) {
|
||||
; CHECK: @test6
|
||||
%B = mul i8 %A, 8 ; <i8> [#uses=1]
|
||||
%C = mul i8 %B, 8 ; <i8> [#uses=1]
|
||||
ret i8 %C
|
||||
; CHECK: shl i8 %A, 6
|
||||
}
|
||||
|
||||
define i32 @test7(i32 %i) {
|
||||
; CHECK: @test7
|
||||
%tmp = mul i32 %i, -1 ; <i32> [#uses=1]
|
||||
ret i32 %tmp
|
||||
; CHECK: sub i32 0, %i
|
||||
}
|
||||
|
||||
define i64 @test8(i64 %i) {
|
||||
; tmp = sub 0, %i
|
||||
; CHECK: @test8
|
||||
%j = mul i64 %i, -1 ; <i64> [#uses=1]
|
||||
ret i64 %j
|
||||
; CHECK: sub i64 0, %i
|
||||
}
|
||||
|
||||
define i32 @test9(i32 %i) {
|
||||
; %j = sub 0, %i
|
||||
; CHECK: @test9
|
||||
%j = mul i32 %i, -1 ; <i32> [#uses=1]
|
||||
ret i32 %j
|
||||
; CHECJ: sub i32 0, %i
|
||||
}
|
||||
|
||||
define i32 @test10(i32 %a, i32 %b) {
|
||||
; CHECK: @test10
|
||||
%c = icmp slt i32 %a, 0 ; <i1> [#uses=1]
|
||||
%d = zext i1 %c to i32 ; <i32> [#uses=1]
|
||||
; e = b & (a >> 31)
|
||||
%e = mul i32 %d, %b ; <i32> [#uses=1]
|
||||
ret i32 %e
|
||||
; CHECK: [[TEST10:%.*]] = ashr i32 %a, 31
|
||||
; CHECK-NEXT: %e = and i32 [[TEST10]], %b
|
||||
; CHECK-NEXT: ret i32 %e
|
||||
}
|
||||
|
||||
define i32 @test11(i32 %a, i32 %b) {
|
||||
; CHECK: @test11
|
||||
%c = icmp sle i32 %a, -1 ; <i1> [#uses=1]
|
||||
%d = zext i1 %c to i32 ; <i32> [#uses=1]
|
||||
; e = b & (a >> 31)
|
||||
%e = mul i32 %d, %b ; <i32> [#uses=1]
|
||||
ret i32 %e
|
||||
; CHECK: [[TEST11:%.*]] = ashr i32 %a, 31
|
||||
; CHECK-NEXT: %e = and i32 [[TEST11]], %b
|
||||
; CHECK-NEXT: ret i32 %e
|
||||
}
|
||||
|
||||
define i32 @test12(i8 %a, i32 %b) {
|
||||
%c = icmp ugt i8 %a, 127 ; <i1> [#uses=1]
|
||||
define i32 @test12(i32 %a, i32 %b) {
|
||||
; CHECK: @test12
|
||||
%c = icmp ugt i32 %a, 2147483647 ; <i1> [#uses=1]
|
||||
%d = zext i1 %c to i32 ; <i32> [#uses=1]
|
||||
; e = b & (a >> 31)
|
||||
%e = mul i32 %d, %b ; <i32> [#uses=1]
|
||||
ret i32 %e
|
||||
; CHECK: [[TEST12:%.*]] = ashr i32 %a, 31
|
||||
; CHECK-NEXT: %e = and i32 [[TEST12]], %b
|
||||
; CHECK-NEXT: ret i32 %e
|
||||
|
||||
}
|
||||
|
||||
; PR2642
|
||||
define internal void @test13(<4 x float>*) {
|
||||
; CHECK: @test13
|
||||
load <4 x float>* %0, align 1
|
||||
fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
|
||||
store <4 x float> %3, <4 x float>* %0, align 1
|
||||
ret void
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
define <16 x i8> @test14(<16 x i8> %a) {
|
||||
; CHECK: @test14
|
||||
%b = mul <16 x i8> %a, zeroinitializer
|
||||
ret <16 x i8> %b
|
||||
; CHECK-NEXT: ret <16 x i8> zeroinitializer
|
||||
}
|
||||
|
||||
; rdar://7293527
|
||||
define i32 @test15(i32 %A, i32 %B) {
|
||||
; CHECK: @test15
|
||||
entry:
|
||||
%shl = shl i32 1, %B
|
||||
%m = mul i32 %shl, %A
|
||||
ret i32 %m
|
||||
; CHECK: shl i32 %A, %B
|
||||
}
|
||||
|
||||
; X * Y (when Y is 0 or 1) --> x & (0-Y)
|
||||
define i32 @test16(i32 %b, i1 %c) {
|
||||
; CHECK: @test16
|
||||
%d = zext i1 %c to i32 ; <i32> [#uses=1]
|
||||
; e = b & (a >> 31)
|
||||
%e = mul i32 %d, %b ; <i32> [#uses=1]
|
||||
ret i32 %e
|
||||
; CHECK: [[TEST16:%.*]] = sext i1 %c to i32
|
||||
; CHECK-NEXT: %e = and i32 [[TEST16]], %b
|
||||
; CHECK-NEXT: ret i32 %e
|
||||
}
|
||||
|
||||
; X * Y (when Y is 0 or 1) --> x & (0-Y)
|
||||
define i32 @test17(i32 %a, i32 %b) {
|
||||
; CHECK: @test17
|
||||
%a.lobit = lshr i32 %a, 31
|
||||
%e = mul i32 %a.lobit, %b
|
||||
ret i32 %e
|
||||
; CHECK: [[TEST17:%.*]] = ashr i32 %a, 31
|
||||
; CHECK-NEXT: %e = and i32 [[TEST17]], %b
|
||||
; CHECK-NEXT: ret i32 %e
|
||||
}
|
||||
|
||||
define i32 @test18(i32 %A, i32 %B) {
|
||||
; CHECK: @test18
|
||||
%C = and i32 %A, 1
|
||||
%D = and i32 %B, 1
|
||||
|
||||
%E = mul i32 %C, %D
|
||||
%F = and i32 %E, 16
|
||||
ret i32 %F
|
||||
; CHECK-NEXT: ret i32 0
|
||||
}
|
||||
|
||||
declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
|
||||
declare void @use(i1)
|
||||
|
||||
define i32 @test19(i32 %A, i32 %B) {
|
||||
; CHECK: @test19
|
||||
%C = and i32 %A, 1
|
||||
%D = and i32 %B, 1
|
||||
|
||||
; It would be nice if we also started proving that this doesn't overflow.
|
||||
%E = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %C, i32 %D)
|
||||
%F = extractvalue {i32, i1} %E, 0
|
||||
%G = extractvalue {i32, i1} %E, 1
|
||||
call void @use(i1 %G)
|
||||
%H = and i32 %F, 16
|
||||
ret i32 %H
|
||||
; CHECK: ret i32 0
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue