[SCEVExpand] Only create required predicate checks.

Currently generateOverflowCheck always creates code for Step being
negative and positive, followed by a select at the end depending on
Step's sign.

This patch updates the code to only create either the checks for step
being positive or negative, if the sign is known.

Follow-up to D116696.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D116747
This commit is contained in:
Florian Hahn 2022-01-07 14:44:03 +00:00
parent 7d9827f5cd
commit f395a4f8d5
No known key found for this signature in database
GPG Key ID: EEF712BB5E80EBBA
13 changed files with 91 additions and 183 deletions

View File

@ -2510,29 +2510,44 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
}
// Compute:
// Start + |Step| * Backedge < Start
// Start - |Step| * Backedge > Start
// 1. Start + |Step| * Backedge < Start
// 2. Start - |Step| * Backedge > Start
//
// And select either 1. or 2. depending on whether step is positive or
// negative. If Step is known to be positive or negative, only create
// either 1. or 2.
Value *Add = nullptr, *Sub = nullptr;
bool NeedPosCheck = !SE.isKnownNegative(Step);
bool NeedNegCheck = !SE.isKnownPositive(Step);
if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
StartValue = InsertNoopCastOfTo(
StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
Value *NegMulV = Builder.CreateNeg(MulV);
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
if (NeedPosCheck)
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
if (NeedNegCheck)
Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
} else {
Add = Builder.CreateAdd(StartValue, MulV);
Sub = Builder.CreateSub(StartValue, MulV);
if (NeedPosCheck)
Add = Builder.CreateAdd(StartValue, MulV);
if (NeedNegCheck)
Sub = Builder.CreateSub(StartValue, MulV);
}
Value *EndCompareGT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
Value *EndCompareLT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
// Select the answer based on the sign of Step.
Value *EndCheck =
Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
Value *EndCompareLT = nullptr;
Value *EndCompareGT = nullptr;
Value *EndCheck = nullptr;
if (NeedPosCheck)
EndCheck = EndCompareLT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
if (NeedNegCheck)
EndCheck = EndCompareGT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
if (NeedPosCheck && NeedNegCheck) {
// Select the answer based on the sign of Step.
EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
}
// If the backedge taken count type is larger than the AR type,
// check that we don't drop any bits by truncating it. If we are

View File

@ -18,23 +18,17 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
; CHECK: for.body.ph.lver.orig:
@ -166,23 +160,17 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
; CHECK: for.body.ph.lver.orig:

View File

@ -541,13 +541,10 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-TWO-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
; VF-TWO-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
; VF-TWO-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; VF-TWO-CHECK: vector.main.loop.iter.check:
@ -771,13 +768,10 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; VF-FOUR-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
; VF-FOUR-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
; VF-FOUR-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; VF-FOUR-CHECK: vector.main.loop.iter.check:

View File

@ -63,11 +63,8 @@ define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP0]]
; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP8]], [[TMP0]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]]
; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]

View File

@ -52,13 +52,10 @@ define i32 @main() local_unnamed_addr #0 {
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP10]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[TMP7]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i8 [[TMP11]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = select i1 true, i1 [[TMP13]], i1 [[TMP14]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]]
; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:

View File

@ -646,11 +646,8 @@ define void @sink_dominance(i32* %ptr, i32 %N) {
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add i32 0, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0
; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]]
; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
@ -734,11 +731,8 @@ define void @sink_dominance_2(i32* %ptr, i32 %N) {
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add i32 0, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0
; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]]
; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]

View File

@ -3557,20 +3557,14 @@ define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@ -3784,20 +3778,14 @@ define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
; UNROLL-NO-IC: vector.scevcheck:
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
@ -3984,20 +3972,14 @@ define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@ -4221,20 +4203,14 @@ define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
; UNROLL-NO-IC: vector.scevcheck:
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
@ -4634,12 +4610,9 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 2
@ -4774,12 +4747,9 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i32 0, [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4
@ -6549,9 +6519,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
@ -6627,9 +6597,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; IND-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
; IND-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
; IND-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; IND-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
; IND-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
; IND-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
; IND-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
; IND-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
; IND-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
; IND-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
; IND-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
; IND-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
@ -6700,9 +6670,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
; UNROLL-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
; UNROLL-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; UNROLL-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
; UNROLL-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
; UNROLL-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
; UNROLL-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
; UNROLL-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
; UNROLL-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
; UNROLL-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
; UNROLL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
; UNROLL-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
@ -6780,9 +6750,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; UNROLL-NO-IC-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
@ -6865,9 +6835,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
; INTERLEAVE-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
; INTERLEAVE-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
; INTERLEAVE-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
; INTERLEAVE-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
; INTERLEAVE-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
; INTERLEAVE-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]

View File

@ -171,13 +171,10 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:

View File

@ -53,9 +53,9 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
@ -179,9 +179,9 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr {
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i8 [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i8 [[TMP6]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i8 [[TMP6]], 0
; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP7]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
@ -379,9 +379,9 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i8 [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP5]], 0
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP2]], i1 [[TMP7]], i1 [[TMP8]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i8 [[TMP5]], 0
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP2]], i1 [[TMP8]], i1 [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[CSTEP]], 0
; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]]

View File

@ -25,12 +25,9 @@ define i8 @widget(i8* %arr, i8 %t9) {
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], [[ARR2]]
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = add i8 1, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i8 1, [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 1
; CHECK-NEXT: [[TMP10:%.*]] = select i1 false, i1 [[TMP8]], i1 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP4]], 255
; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP11]]
; CHECK-NEXT: br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4

View File

@ -21,12 +21,9 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
@ -111,12 +108,9 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
@ -280,12 +274,9 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2

View File

@ -18,10 +18,8 @@ define void @"japi1_align!_9477"(%jl_value_t addrspace(10)** %arg) {
; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
; LV: [[OFNegMulResult:%[^ ]*]] = sub i64 0, [[OFMulResult]]
; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFMulResult]]
; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base]], i64 [[OFNegMulResult]]
; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFNegMulResult]]
; LV-NEXT: icmp ugt i8 addrspace(13)* [[NegGEP]], [[Base]]
; LV-NEXT: icmp ult i8 addrspace(13)* [[PosGEP]], [[Base]]
; LV-NOT: inttoptr
; LV-NOT: ptrtoint
top:

View File

@ -35,23 +35,17 @@ define void @f1(i16* noalias %a,
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
; LV-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
; LV-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
; LV-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig:
@ -160,13 +154,10 @@ define void @f2(i16* noalias %a,
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], [[TMP1]]
; LV-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP1]]
; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; LV-NEXT: [[TMP12:%.*]] = trunc i64 [[N]] to i31
; LV-NEXT: [[TMP13:%.*]] = zext i31 [[TMP12]] to i64
@ -177,12 +168,9 @@ define void @f2(i16* noalias %a,
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
; LV-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT3]]
; LV-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
; LV-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]]
; LV-NEXT: [[TMP18:%.*]] = icmp ugt i8* [[TMP17]], [[SCEVGEP5]]
; LV-NEXT: [[TMP19:%.*]] = icmp ult i8* [[TMP16]], [[SCEVGEP5]]
; LV-NEXT: [[TMP20:%.*]] = select i1 true, i1 [[TMP18]], i1 [[TMP19]]
; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP22:%.*]] = or i1 [[TMP10]], [[TMP21]]
; LV-NEXT: br i1 [[TMP22]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig:
@ -276,23 +264,17 @@ define void @f3(i16* noalias %a,
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
; LV-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
; LV-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig:
@ -377,13 +359,10 @@ define void @f4(i16* noalias %a,
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]]
; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]]
; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
@ -392,12 +371,9 @@ define void @f4(i16* noalias %a,
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
; LV-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]]
; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]]
; LV-NEXT: [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]]
; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]]
; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]]
; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]]
; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig:
@ -490,13 +466,10 @@ define void @f5(i16* noalias %a,
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]]
; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]]
; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
@ -505,12 +478,9 @@ define void @f5(i16* noalias %a,
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
; LV-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]]
; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]]
; LV-NEXT: [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]]
; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]]
; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]]
; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]]
; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig: