[IRBuilder] Fold consistently for or/and whether constant is LHS or RHS

Without this, we have the unfortunate property that tests are dependent on the order of operads passed the CreateOr and CreateAnd functions.  In actual usage, we'd promptly optimize them away, but it made tests slightly more verbose than they should have been.

llvm-svn: 365260
This commit is contained in:
Philip Reames 2019-07-06 04:28:00 +00:00
parent 9e62c86408
commit 9812668d77
10 changed files with 70 additions and 88 deletions

View File

@ -1197,6 +1197,9 @@ public:
}
Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
if (auto *LC = dyn_cast<ConstantInt>(LHS))
if (LC->isMinusOne())
return RHS; // -1 & RHS = RHS
if (auto *RC = dyn_cast<Constant>(RHS)) {
if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isMinusOne())
return LHS; // LHS & -1 -> LHS
@ -1223,6 +1226,9 @@ public:
}
Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
if (auto *LC = dyn_cast<Constant>(LHS))
if (LC->isNullValue())
return RHS; // 0 | RHS -> RHS
if (auto *RC = dyn_cast<Constant>(RHS)) {
if (RC->isNullValue())
return LHS; // LHS | 0 -> LHS

View File

@ -53,8 +53,6 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
@ -65,7 +63,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]]
; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f1(i16* noalias %a,
i16* noalias %b, i64 %N) {
@ -147,9 +145,6 @@ for.end: ; preds = %for.body
; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
@ -160,7 +155,7 @@ for.end: ; preds = %for.body
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]]
; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f2(i16* noalias %a,
i16* noalias %b, i64 %N) {
@ -227,9 +222,6 @@ for.end: ; preds = %for.body
; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
@ -240,7 +232,7 @@ for.end: ; preds = %for.body
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]]
; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f3(i16* noalias %a,
i16* noalias %b, i64 %N) {
@ -303,9 +295,6 @@ for.end: ; preds = %for.body
; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
@ -316,7 +305,7 @@ for.end: ; preds = %for.body
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]]
; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f4(i16* noalias %a,
i16* noalias %b, i64 %N) {
@ -381,9 +370,6 @@ for.end: ; preds = %for.body
; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
@ -394,7 +380,7 @@ for.end: ; preds = %for.body
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]]
; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
define void @f5(i16* noalias %a,
i16* noalias %b, i64 %N) {

View File

@ -50,7 +50,6 @@ for.body.i: ; preds = %for.body.i, %entry
; CHECK: mul i64 {{.*}}, 4
; CHECK: sub i64 4000, %
; CHECK-NEXT: icmp ult i64 {{.*}}, 4
; CHECK-NEXT: or i1
; CHECK: trap
%1 = load i32, i32* %arrayidx.i, align 4
%add.i = add nsw i32 %1, %sum.01.i
@ -243,7 +242,6 @@ for.body4: ; preds = %for.body4, %for.con
; CHECK: add i64
; CHECK: sub i64 16, %
; CHECK-NEXT: icmp ult i64 {{.*}}, 4
; CHECK-NEXT: or i1
; CHECK: trap
%1 = load i32, i32* %arrayidx7, align 4
%add = add nsw i32 %1, %sum.119

View File

@ -25,18 +25,17 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]]
; CHECK-NEXT: [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
; CHECK-NEXT: [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0
; CHECK-NEXT: [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[A2]], [[MUL_RESULT4]]
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], [[A2]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP11]], [[A2]]
; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW5]]
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]]
; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[A2]], [[MUL_RESULT4]]
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP11]], [[A2]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i64 [[TMP10]], [[A2]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW5]]
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP9]], [[TMP15]]
; CHECK-NEXT: br i1 [[TMP16]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
; CHECK: for.body.ph.lver.orig:
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
; CHECK: for.body.lver.orig:
@ -70,14 +69,14 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2
; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64
; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LDIST1]]
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !0
; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[MUL_EXT_LDIST1]]
; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4
; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4, !alias.scope !3
; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]]
; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1
; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1
; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LDIST1]]
; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4
; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !5
; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]]
; CHECK: for.body.ph:
@ -90,12 +89,12 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1
; CHECK-NEXT: [[INC1]] = add i32 [[IND1]], 1
; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D]], i64 [[MUL_EXT]]
; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4
; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4, !alias.scope !7
; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E]], i64 [[MUL_EXT]]
; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4
; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4, !alias.scope !9
; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]]
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4, !alias.scope !11
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
; CHECK: for.end:

View File

@ -232,12 +232,11 @@ define i32 @signed_loop_0_to_n_ult_check_length_range_known(i32* %array, i32* %l
; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sle i32 [[N]], [[LENGTH]]
; CHECK-NEXT: [[TMP1:%.*]] = and i1 true, [[TMP0]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP1]], i32 9) [ "deopt"() ]
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP0]], i32 9) [ "deopt"() ]
; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4

View File

@ -282,14 +282,13 @@ define i32 @signed_loop_0_to_n_ult_check_length_range_known(i32* %array, i32* %l
; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sle i32 [[N]], [[LENGTH]]
; CHECK-NEXT: [[TMP1:%.*]] = and i1 true, [[TMP0]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[WIDENABLE_COND]]
; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0
; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[WIDENABLE_COND]]
; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0
; CHECK: deopt:
; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ]
; CHECK-NEXT: ret i32 [[DEOPTCALL]]

View File

@ -385,14 +385,13 @@ define i32 @constant_length(i32* %array, i32 %n) {
; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], 20
; CHECK-NEXT: [[TMP1:%.*]] = and i1 true, [[TMP0]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ]
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP1]], i32 9) [ "deopt"() ]
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP0]], i32 9) [ "deopt"() ]
; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]]
; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4

View File

@ -71,34 +71,33 @@ define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 {
; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[TMP16:%.*]] = or i1 false, [[TMP15]]
; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDEX]] to i32
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], 0
; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[ADD_US]], [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP23]], align 4
; CHECK-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDEX]] to i32
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 0
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[ADD_US]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP23]], i32 0
; CHECK-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1
; CHECK-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2
; CHECK-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3
; CHECK-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3
; CHECK-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]]

View File

@ -40,16 +40,16 @@ define i32 @main() local_unnamed_addr #0 {
; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMAX]]
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMIN]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[CONV3]], -1
; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]]
; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMAX1]]
; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMIN1]]
; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
@ -62,48 +62,47 @@ define i32 @main() local_unnamed_addr #0 {
; CHECK-NEXT: [[TMP18:%.*]] = icmp ugt i32 [[TMP11]], 255
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[TMP21:%.*]] = or i1 false, [[TMP20]]
; CHECK-NEXT: br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: br i1 [[TMP20]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP7]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP7]], [[N_MOD_VF]]
; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
; CHECK-NEXT: [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]]
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP22]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP21]], [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP22:%.*]] = trunc i32 [[INDEX]] to i8
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP22]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 0, i8 -1, i8 -2, i8 -3>
; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 -4, i8 -5, i8 -6, i8 -7>
; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4
; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP27]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], -4
; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP27:%.*]] = add i8 [[TMP23]], -1
; CHECK-NEXT: [[TMP28:%.*]] = add i8 [[TMP24]], -1
; CHECK-NEXT: [[TMP29:%.*]] = add i8 [[TMP25]], -1
; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP27]] to i32
; CHECK-NEXT: [[TMP30:%.*]] = zext i8 [[TMP28]] to i32
; CHECK-NEXT: [[TMP31:%.*]] = zext i8 [[TMP29]] to i32
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]]
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP26]], [[TMP25]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[BIN_RDX4]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <4 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP32]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY8:%.*]]
; CHECK: for.body8:
; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ]
@ -114,7 +113,7 @@ define i32 @main() local_unnamed_addr #0 {
; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]]
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop !2
; CHECK: for.cond4.for.inc9_crit_edge:
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP32]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16
; CHECK-NEXT: br label [[FOR_INC9]]
; CHECK: for.inc9:

View File

@ -33,9 +33,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow
; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]]
; CHECK: %ident.check = icmp ne i32 {{.*}}, %{{.*}}
; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check
; CHECK: %{{.*}} = or i1 %[[TEST]], %ident.check
; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
; CHECK: vector.body:
; CHECK: <4 x i32>
@ -92,10 +91,9 @@ for.end:
; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow
; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]]
; CHECK: %[[EXT:[0-9]+]] = sext i8 {{.*}} to i32
; CHECK: %ident.check = icmp ne i32 {{.*}}, %[[EXT]]
; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check
; CHECK: %{{.*}} = or i1 %[[TEST]], %ident.check
; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
; CHECK: vector.body:
; CHECK: <4 x i32>