forked from OSchip/llvm-project
[ValueTracking] More accurate unsigned sub overflow detection
Second part of D58593. Compute precise overflow conditions based on all known bits, rather than just the sign bits. Unsigned a - b overflows iff a < b, and we can determine whether this always/never happens based on the minimal and maximal values achievable for a and b subject to the known bits constraint. llvm-svn: 355109
This commit is contained in:
parent
ab10947b34
commit
af2b0bef43
|
@ -4165,18 +4165,16 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
|
|||
const Instruction *CxtI,
|
||||
const DominatorTree *DT) {
|
||||
KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
|
||||
if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
|
||||
KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
|
||||
|
||||
// If the LHS is negative and the RHS is non-negative, no unsigned wrap.
|
||||
if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
|
||||
// a - b overflows iff a < b. Determine whether this is never/always true
|
||||
// based on the min/max values achievable under the known bits constraint.
|
||||
APInt MinLHS = LHSKnown.One, MaxLHS = ~LHSKnown.Zero;
|
||||
APInt MinRHS = RHSKnown.One, MaxRHS = ~RHSKnown.Zero;
|
||||
if (MinLHS.uge(MaxRHS))
|
||||
return OverflowResult::NeverOverflows;
|
||||
|
||||
// If the LHS is non-negative and the RHS negative, we always wrap.
|
||||
if (LHSKnown.isNonNegative() && RHSKnown.isNegative())
|
||||
if (MaxLHS.ult(MinRHS))
|
||||
return OverflowResult::AlwaysOverflows;
|
||||
}
|
||||
|
||||
return OverflowResult::MayOverflow;
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ define i32 @foo(i32 %arg) {
|
|||
; CHECK-NEXT: [[V35:%.*]] = add nuw nsw i32 [[V34]], 1362915575
|
||||
; CHECK-NEXT: [[V40:%.*]] = shl nuw nsw i32 [[V34]], 1
|
||||
; CHECK-NEXT: [[V41:%.*]] = and i32 [[V40]], 290
|
||||
; CHECK-NEXT: [[V42:%.*]] = sub nsw i32 [[V35]], [[V41]]
|
||||
; CHECK-NEXT: [[V42:%.*]] = sub nuw nsw i32 [[V35]], [[V41]]
|
||||
; CHECK-NEXT: [[V43:%.*]] = add nuw i32 [[V42]], 1533579450
|
||||
; CHECK-NEXT: [[V45:%.*]] = xor i32 [[V43]], 749011377
|
||||
; CHECK-NEXT: ret i32 [[V45]]
|
||||
|
|
|
@ -611,7 +611,7 @@ define <2 x i8> @test_vector_usub_nneg_nneg(<2 x i8> %a) {
|
|||
define i8 @test_scalar_usub_never_overflows(i8 %a) {
|
||||
; CHECK-LABEL: @test_scalar_usub_never_overflows(
|
||||
; CHECK-NEXT: [[A_MASKED:%.*]] = or i8 [[A:%.*]], 64
|
||||
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A_MASKED]], i8 10)
|
||||
; CHECK-NEXT: [[R:%.*]] = add nsw i8 [[A_MASKED]], -10
|
||||
; CHECK-NEXT: ret i8 [[R]]
|
||||
;
|
||||
%a_masked = or i8 %a, 64
|
||||
|
@ -622,7 +622,7 @@ define i8 @test_scalar_usub_never_overflows(i8 %a) {
|
|||
define <2 x i8> @test_vector_usub_never_overflows(<2 x i8> %a) {
|
||||
; CHECK-LABEL: @test_vector_usub_never_overflows(
|
||||
; CHECK-NEXT: [[A_MASKED:%.*]] = or <2 x i8> [[A:%.*]], <i8 64, i8 64>
|
||||
; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A_MASKED]], <2 x i8> <i8 10, i8 10>)
|
||||
; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_MASKED]], <i8 -10, i8 -10>
|
||||
; CHECK-NEXT: ret <2 x i8> [[R]]
|
||||
;
|
||||
%a_masked = or <2 x i8> %a, <i8 64, i8 64>
|
||||
|
@ -632,9 +632,7 @@ define <2 x i8> @test_vector_usub_never_overflows(<2 x i8> %a) {
|
|||
|
||||
define i8 @test_scalar_usub_always_overflows(i8 %a) {
|
||||
; CHECK-LABEL: @test_scalar_usub_always_overflows(
|
||||
; CHECK-NEXT: [[A_MASKED:%.*]] = and i8 [[A:%.*]], 64
|
||||
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A_MASKED]], i8 100)
|
||||
; CHECK-NEXT: ret i8 [[R]]
|
||||
; CHECK-NEXT: ret i8 0
|
||||
;
|
||||
%a_masked = and i8 %a, 64
|
||||
%r = call i8 @llvm.usub.sat.i8(i8 %a_masked, i8 100)
|
||||
|
@ -643,9 +641,7 @@ define i8 @test_scalar_usub_always_overflows(i8 %a) {
|
|||
|
||||
define <2 x i8> @test_vector_usub_always_overflows(<2 x i8> %a) {
|
||||
; CHECK-LABEL: @test_vector_usub_always_overflows(
|
||||
; CHECK-NEXT: [[A_MASKED:%.*]] = and <2 x i8> [[A:%.*]], <i8 64, i8 64>
|
||||
; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A_MASKED]], <2 x i8> <i8 100, i8 100>)
|
||||
; CHECK-NEXT: ret <2 x i8> [[R]]
|
||||
; CHECK-NEXT: ret <2 x i8> zeroinitializer
|
||||
;
|
||||
%a_masked = and <2 x i8> %a, <i8 64, i8 64>
|
||||
%r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a_masked, <2 x i8> <i8 100, i8 100>)
|
||||
|
|
|
@ -78,7 +78,7 @@ define i64 @test6(i32 %x) {
|
|||
define i64 @test7(i32 %x) {
|
||||
; CHECK-LABEL: @test7(
|
||||
; CHECK-NEXT: [[T:%.*]] = and i32 %x, 511
|
||||
; CHECK-NEXT: [[U:%.*]] = sub nsw i32 20000, [[T]]
|
||||
; CHECK-NEXT: [[U:%.*]] = sub nuw nsw i32 20000, [[T]]
|
||||
; CHECK-NEXT: [[S1:%.*]] = zext i32 [[U]] to i64
|
||||
; CHECK-NEXT: ret i64 [[S1]]
|
||||
;
|
||||
|
|
|
@ -128,7 +128,7 @@ define i32 @test_simplify10(i32 %x) {
|
|||
define i32 @test_simplify11(i32 %x) {
|
||||
; CHECK-LABEL: @test_simplify11(
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 7
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 9, [[AND]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i32 9, [[AND]]
|
||||
; CHECK-NEXT: ret i32 [[TMP1]]
|
||||
;
|
||||
%and = and i32 %x, 7
|
||||
|
|
|
@ -39,7 +39,7 @@ define i32 @test2(i32 %x) nounwind {
|
|||
define i32 @test3(i32 %x) {
|
||||
; CHECK-LABEL: @test3(
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 31
|
||||
; CHECK-NEXT: [[ADD:%.*]] = sub nsw i32 73, [[AND]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = sub nuw nsw i32 73, [[AND]]
|
||||
; CHECK-NEXT: ret i32 [[ADD]]
|
||||
;
|
||||
%and = and i32 %x, 31
|
||||
|
|
|
@ -130,9 +130,9 @@ define i64 @test_simplify10(i32 %x) {
|
|||
define i64 @test_simplify11(i32 %x) {
|
||||
; CHECK-LABEL: @test_simplify11(
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 7
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[AND]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i64 9, [[TMP1]]
|
||||
; CHECK-NEXT: ret i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[NARROW:%.*]] = sub nuw nsw i32 9, [[AND]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[NARROW]] to i64
|
||||
; CHECK-NEXT: ret i64 [[TMP1]]
|
||||
;
|
||||
%and = and i32 %x, 7
|
||||
%hello_p = getelementptr inbounds [13 x i32], [13 x i32]* @null_hello_mid, i32 0, i32 %and
|
||||
|
|
|
@ -131,9 +131,9 @@ define i64 @test_simplify10(i16 %x) {
|
|||
define i64 @test_simplify11(i16 %x) {
|
||||
; CHECK-LABEL: @test_simplify11(
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i16 [[X:%.*]], 7
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[AND]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i64 9, [[TMP1]]
|
||||
; CHECK-NEXT: ret i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[NARROW:%.*]] = sub nuw nsw i16 9, [[AND]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[NARROW]] to i64
|
||||
; CHECK-NEXT: ret i64 [[TMP1]]
|
||||
;
|
||||
%and = and i16 %x, 7
|
||||
%hello_p = getelementptr inbounds [13 x i16], [13 x i16]* @null_hello_mid, i16 0, i16 %and
|
||||
|
|
|
@ -2028,7 +2028,7 @@ define void @foo4(double* %A, double* %B, i32* %trigger) {
|
|||
; AVX512-NEXT: br i1 [[TMP23]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_BODY]], !llvm.loop !49
|
||||
; AVX512: for.body.preheader:
|
||||
; AVX512-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 9984, [[VECTOR_BODY]] ]
|
||||
; AVX512-NEXT: [[TMP24:%.*]] = sub nsw i64 9999, [[INDVARS_IV_PH]]
|
||||
; AVX512-NEXT: [[TMP24:%.*]] = sub nuw nsw i64 9999, [[INDVARS_IV_PH]]
|
||||
; AVX512-NEXT: br label [[FOR_BODY_PROL:%.*]]
|
||||
; AVX512: for.body.prol:
|
||||
; AVX512-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_INC_PROL:%.*]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ]
|
||||
|
|
Loading…
Reference in New Issue