forked from OSchip/llvm-project
[InstCombine] Fold ult(add(x,-1),c) -> ule(x,c) iff x != 0 (PR57635)
Alive2: https://alive2.llvm.org/ce/z/sZ6wwS As detailed on Issue #57635 and #37628 - for unsigned comparisons, we can compare prior to a decrement iff the value is known never to be zero. Differential Revision: https://reviews.llvm.org/D134172
This commit is contained in:
parent
8aed4bb278
commit
09cb9fdef9
|
@ -2904,6 +2904,13 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
|
|||
if (Pred == CmpInst::ICMP_SLT && C == *C2)
|
||||
return new ICmpInst(ICmpInst::ICMP_UGT, X, ConstantInt::get(Ty, C ^ SMax));
|
||||
|
||||
// (X + -1) <u C --> X <=u C (if X is never null)
|
||||
if (Pred == CmpInst::ICMP_ULT && C2->isAllOnes()) {
|
||||
const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
|
||||
if (llvm::isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT))
|
||||
return new ICmpInst(ICmpInst::ICMP_ULE, X, ConstantInt::get(Ty, C));
|
||||
}
|
||||
|
||||
if (!Add->hasOneUse())
|
||||
return nullptr;
|
||||
|
||||
|
|
|
@ -1216,8 +1216,7 @@ define i1 @icmp_dec_assume_nonzero(i8 %x) {
|
|||
; CHECK-LABEL: @icmp_dec_assume_nonzero(
|
||||
; CHECK-NEXT: [[Z:%.*]] = icmp ne i8 [[X:%.*]], 0
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[Z]])
|
||||
; CHECK-NEXT: [[I:%.*]] = add i8 [[X]], -1
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[I]], 7
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 8
|
||||
; CHECK-NEXT: ret i1 [[C]]
|
||||
;
|
||||
%z = icmp ne i8 %x, 0
|
||||
|
@ -1231,8 +1230,7 @@ define i1 @icmp_dec_sub_assume_nonzero(i8 %x) {
|
|||
; CHECK-LABEL: @icmp_dec_sub_assume_nonzero(
|
||||
; CHECK-NEXT: [[Z:%.*]] = icmp ne i8 [[X:%.*]], 0
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[Z]])
|
||||
; CHECK-NEXT: [[I:%.*]] = add i8 [[X]], -1
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[I]], 11
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 12
|
||||
; CHECK-NEXT: ret i1 [[C]]
|
||||
;
|
||||
%z = icmp ne i8 %x, 0
|
||||
|
@ -1244,9 +1242,7 @@ define i1 @icmp_dec_sub_assume_nonzero(i8 %x) {
|
|||
|
||||
define i1 @icmp_dec_nonzero(i16 %x) {
|
||||
; CHECK-LABEL: @icmp_dec_nonzero(
|
||||
; CHECK-NEXT: [[O:%.*]] = or i16 [[X:%.*]], 4
|
||||
; CHECK-NEXT: [[I:%.*]] = add nsw i16 [[O]], -1
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ult i16 [[I]], 7
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp ult i16 [[X:%.*]], 8
|
||||
; CHECK-NEXT: ret i1 [[C]]
|
||||
;
|
||||
%o = or i16 %x, 4
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -S -passes='default<O1>' -mtriple aarch64 -mcpu=cortex-a55 | FileCheck %s -check-prefix=CHECK-A55
|
||||
; RUN: opt < %s -S -passes='default<O1>' -mtriple aarch64 | FileCheck %s -check-prefix=CHECK-GENERIC
|
||||
; RUN: opt < %s -S -passes="default<O1>" -mtriple aarch64 -mcpu=cortex-a55 | FileCheck %s -check-prefix=CHECK-A55
|
||||
; RUN: opt < %s -S -passes="default<O1>" -mtriple aarch64 | FileCheck %s -check-prefix=CHECK-GENERIC
|
||||
|
||||
; Testing that, while runtime unrolling is performed on in-order cores (such as the cortex-a55), it is not performed when -mcpu is not specified
|
||||
define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %arg_3) {
|
||||
|
@ -12,41 +12,40 @@ define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %
|
|||
; CHECK-A55-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef
|
||||
; CHECK-A55-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef
|
||||
; CHECK-A55-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef
|
||||
; CHECK-A55-NEXT: [[TMP0:%.*]] = add i32 [[ARG_0]], -1
|
||||
; CHECK-A55-NEXT: [[XTRAITER:%.*]] = and i32 [[ARG_0]], 3
|
||||
; CHECK-A55-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
|
||||
; CHECK-A55-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_LR_PH_NEW:%.*]]
|
||||
; CHECK-A55-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_0]], 4
|
||||
; CHECK-A55-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_LR_PH_NEW:%.*]]
|
||||
; CHECK-A55: for.body6.lr.ph.new:
|
||||
; CHECK-A55-NEXT: [[UNROLL_ITER:%.*]] = and i32 [[ARG_0]], -4
|
||||
; CHECK-A55-NEXT: br label [[FOR_BODY6:%.*]]
|
||||
; CHECK-A55: for.body6:
|
||||
; CHECK-A55-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ]
|
||||
; CHECK-A55-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15:%.*]] = sext i16 [[TMP3]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15:%.*]] = sext i16 [[TMP2]] to i32
|
||||
; CHECK-A55-NEXT: [[MUL16:%.*]] = mul nsw i32 [[CONV15]], [[CONV]]
|
||||
; CHECK-A55-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP4]]
|
||||
; CHECK-A55-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP3]]
|
||||
; CHECK-A55-NEXT: store i32 [[ADD21]], i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP5]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_1:%.*]] = sext i16 [[TMP6]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP4]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_1:%.*]] = sext i16 [[TMP5]] to i32
|
||||
; CHECK-A55-NEXT: [[MUL16_1:%.*]] = mul nsw i32 [[CONV15_1]], [[CONV_1]]
|
||||
; CHECK-A55-NEXT: [[ADD21_1:%.*]] = add nsw i32 [[MUL16_1]], [[ADD21]]
|
||||
; CHECK-A55-NEXT: store i32 [[ADD21_1]], i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP7]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_2:%.*]] = sext i16 [[TMP8]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP6]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_2:%.*]] = sext i16 [[TMP7]] to i32
|
||||
; CHECK-A55-NEXT: [[MUL16_2:%.*]] = mul nsw i32 [[CONV15_2]], [[CONV_2]]
|
||||
; CHECK-A55-NEXT: [[ADD21_2:%.*]] = add nsw i32 [[MUL16_2]], [[ADD21_1]]
|
||||
; CHECK-A55-NEXT: store i32 [[ADD21_2]], i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP9]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_3:%.*]] = sext i16 [[TMP10]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP8]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_3:%.*]] = sext i16 [[TMP9]] to i32
|
||||
; CHECK-A55-NEXT: [[MUL16_3:%.*]] = mul nsw i32 [[CONV15_3]], [[CONV_3]]
|
||||
; CHECK-A55-NEXT: [[ADD21_3:%.*]] = add nsw i32 [[MUL16_3]], [[ADD21_2]]
|
||||
; CHECK-A55-NEXT: store i32 [[ADD21_3]], i32* [[ARRAYIDX20]], align 4
|
||||
|
@ -57,35 +56,35 @@ define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %
|
|||
; CHECK-A55-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 0
|
||||
; CHECK-A55-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL:%.*]]
|
||||
; CHECK-A55: for.body6.epil:
|
||||
; CHECK-A55-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP11]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_EPIL:%.*]] = sext i16 [[TMP12]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP10]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_EPIL:%.*]] = sext i16 [[TMP11]] to i32
|
||||
; CHECK-A55-NEXT: [[MUL16_EPIL:%.*]] = mul nsw i32 [[CONV15_EPIL]], [[CONV_EPIL]]
|
||||
; CHECK-A55-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[ADD21_EPIL:%.*]] = add nsw i32 [[MUL16_EPIL]], [[TMP13]]
|
||||
; CHECK-A55-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[ADD21_EPIL:%.*]] = add nsw i32 [[MUL16_EPIL]], [[TMP12]]
|
||||
; CHECK-A55-NEXT: store i32 [[ADD21_EPIL]], i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 1
|
||||
; CHECK-A55-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_1:%.*]]
|
||||
; CHECK-A55: for.body6.epil.1:
|
||||
; CHECK-A55-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_EPIL_1:%.*]] = sext i16 [[TMP14]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_EPIL_1:%.*]] = sext i16 [[TMP15]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_EPIL_1:%.*]] = sext i16 [[TMP13]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_EPIL_1:%.*]] = sext i16 [[TMP14]] to i32
|
||||
; CHECK-A55-NEXT: [[MUL16_EPIL_1:%.*]] = mul nsw i32 [[CONV15_EPIL_1]], [[CONV_EPIL_1]]
|
||||
; CHECK-A55-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[ADD21_EPIL_1:%.*]] = add nsw i32 [[MUL16_EPIL_1]], [[TMP16]]
|
||||
; CHECK-A55-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[ADD21_EPIL_1:%.*]] = add nsw i32 [[MUL16_EPIL_1]], [[TMP15]]
|
||||
; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_1]], i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 2
|
||||
; CHECK-A55-NEXT: br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_2:%.*]]
|
||||
; CHECK-A55: for.body6.epil.2:
|
||||
; CHECK-A55-NEXT: [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_EPIL_2:%.*]] = sext i16 [[TMP17]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_EPIL_2:%.*]] = sext i16 [[TMP18]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP16:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV_EPIL_2:%.*]] = sext i16 [[TMP16]] to i32
|
||||
; CHECK-A55-NEXT: [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
|
||||
; CHECK-A55-NEXT: [[CONV15_EPIL_2:%.*]] = sext i16 [[TMP17]] to i32
|
||||
; CHECK-A55-NEXT: [[MUL16_EPIL_2:%.*]] = mul nsw i32 [[CONV15_EPIL_2]], [[CONV_EPIL_2]]
|
||||
; CHECK-A55-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[ADD21_EPIL_2:%.*]] = add nsw i32 [[MUL16_EPIL_2]], [[TMP19]]
|
||||
; CHECK-A55-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: [[ADD21_EPIL_2:%.*]] = add nsw i32 [[MUL16_EPIL_2]], [[TMP18]]
|
||||
; CHECK-A55-NEXT: store i32 [[ADD21_EPIL_2]], i32* [[ARRAYIDX20]], align 4
|
||||
; CHECK-A55-NEXT: br label [[FOR_END]]
|
||||
; CHECK-A55: for.end:
|
||||
|
|
|
@ -9,10 +9,9 @@ define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %
|
|||
; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
|
||||
; CHECK: for.body.lr.ph:
|
||||
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
|
||||
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N]], 4
|
||||
; CHECK-NEXT: br i1 [[TMP0]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
|
||||
; CHECK: for.body.lr.ph.new:
|
||||
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
|
@ -28,30 +27,30 @@ define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %
|
|||
; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]]
|
||||
; CHECK: for.body.epil:
|
||||
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV_UNR]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV_UNR]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4
|
||||
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4
|
||||
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
|
||||
; CHECK-NEXT: [[ADD_EPIL:%.*]] = add nsw i32 [[MUL_EPIL]], [[C_010_UNR]]
|
||||
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 1
|
||||
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]], label [[FOR_BODY_EPIL_1:%.*]]
|
||||
; CHECK: for.body.epil.1:
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
|
||||
; CHECK-NEXT: [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP5]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
|
||||
; CHECK-NEXT: [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP4]], [[TMP3]]
|
||||
; CHECK-NEXT: [[ADD_EPIL_1:%.*]] = add nsw i32 [[MUL_EPIL_1]], [[ADD_EPIL]]
|
||||
; CHECK-NEXT: [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2
|
||||
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]], label [[FOR_BODY_EPIL_2:%.*]]
|
||||
; CHECK: for.body.epil.2:
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4
|
||||
; CHECK-NEXT: [[MUL_EPIL_2:%.*]] = mul nsw i32 [[TMP7]], [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4
|
||||
; CHECK-NEXT: [[MUL_EPIL_2:%.*]] = mul nsw i32 [[TMP6]], [[TMP5]]
|
||||
; CHECK-NEXT: [[ADD_EPIL_2:%.*]] = add nsw i32 [[MUL_EPIL_2]], [[ADD_EPIL_1]]
|
||||
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
|
||||
; CHECK: for.cond.cleanup.loopexit.epilog-lcssa:
|
||||
|
@ -68,31 +67,31 @@ define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %
|
|||
; CHECK-NEXT: [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], [[TMP7]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], [[C_010]]
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
|
||||
; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[TMP11]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
|
||||
; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]]
|
||||
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[ADD]]
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_1]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
|
||||
; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[TMP13]], [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
|
||||
; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]]
|
||||
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[MUL_2]], [[ADD_1]]
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
|
||||
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_2]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
|
||||
; CHECK-NEXT: [[MUL_3:%.*]] = mul nsw i32 [[TMP15]], [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
|
||||
; CHECK-NEXT: [[MUL_3:%.*]] = mul nsw i32 [[TMP14]], [[TMP13]]
|
||||
; CHECK-NEXT: [[ADD_3]] = add nsw i32 [[MUL_3]], [[ADD_2]]
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
|
||||
; CHECK-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
|
||||
|
|
|
@ -21,7 +21,7 @@ define void @saddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* no
|
|||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 15
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 16
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -16
|
||||
|
@ -113,10 +113,10 @@ define void @umin(i8* nocapture readonly %pSrc, i8 signext %offset, i8* nocaptur
|
|||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
|
||||
; CHECK: vector.main.loop.iter.check:
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 31
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[BLOCKSIZE]], 32
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -32
|
||||
|
|
|
@ -1359,7 +1359,7 @@ define i32 @reduction_interleave_group(i32 %n, i32* %arr) #0 {
|
|||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 6
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 7
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP2]], -4
|
||||
|
|
|
@ -198,10 +198,9 @@ define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
|
|||
; AUTO_VEC-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
||||
; AUTO_VEC: for.body.preheader:
|
||||
; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
||||
; AUTO_VEC-NEXT: [[TMP0:%.*]] = add nsw i64 [[ZEXT]], -1
|
||||
; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[ZEXT]], 7
|
||||
; AUTO_VEC-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
|
||||
; AUTO_VEC-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
|
||||
; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N]], 8
|
||||
; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
|
||||
; AUTO_VEC: for.body.preheader.new:
|
||||
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 4294967288
|
||||
; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]]
|
||||
|
|
|
@ -22,10 +22,10 @@ define void @uaddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* no
|
|||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
|
||||
; CHECK: vector.main.loop.iter.check:
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 63
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[BLOCKSIZE]], 64
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -64
|
||||
|
@ -165,10 +165,10 @@ define void @cttz(i8* nocapture readonly %pSrc, i8 signext %offset, i8* nocaptur
|
|||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 15
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 16
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
|
||||
; CHECK: vector.main.loop.iter.check:
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 127
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[BLOCKSIZE]], 128
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -128
|
||||
|
|
|
@ -27,7 +27,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
|
|||
; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
|
||||
; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4
|
||||
|
@ -85,7 +85,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
|
|||
; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
|
||||
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL2: vector.ph:
|
||||
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8
|
||||
|
@ -286,7 +286,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
|
|||
; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
|
||||
; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4
|
||||
|
@ -344,7 +344,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
|
|||
; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
|
||||
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL2: vector.ph:
|
||||
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8
|
||||
|
@ -548,7 +548,7 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
|
|||
; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
|
||||
; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4
|
||||
|
@ -599,7 +599,7 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
|
|||
; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
|
||||
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL2: vector.ph:
|
||||
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8
|
||||
|
@ -792,7 +792,7 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
|
|||
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
|
||||
; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], -4
|
||||
|
@ -874,7 +874,7 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
|
|||
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
|
||||
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL2: vector.ph:
|
||||
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], -8
|
||||
|
@ -1169,7 +1169,7 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
|
|||
; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
|
||||
; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -4
|
||||
|
@ -1217,7 +1217,7 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
|
|||
; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
|
||||
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL2: vector.ph:
|
||||
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -8
|
||||
|
|
|
@ -12,7 +12,7 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
|
|||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
; CHECK: vector.memcheck:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
|
||||
|
|
|
@ -4576,8 +4576,7 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
|
|||
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
|
||||
; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; IND: vector.scevcheck:
|
||||
; IND-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
|
||||
; IND-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[TMP0]], 2147483648
|
||||
; IND-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
|
||||
; IND-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
|
||||
; IND: vector.ph:
|
||||
; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -2
|
||||
|
@ -4586,14 +4585,14 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
|
|||
; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
|
||||
; IND-NEXT: [[TMP1:%.*]] = ashr exact i64 [[SEXT]], 32
|
||||
; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
|
||||
; IND-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
|
||||
; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
|
||||
; IND-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
|
||||
; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
|
||||
; IND-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>*
|
||||
; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4
|
||||
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
|
||||
; IND-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; IND-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
|
||||
; IND-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
|
||||
; IND: middle.block:
|
||||
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]]
|
||||
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||
|
@ -4604,8 +4603,8 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
|
|||
; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||
; IND-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
||||
; IND-NEXT: [[SEXT1:%.*]] = shl i64 [[INDVARS_IV]], 32
|
||||
; IND-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT1]], 32
|
||||
; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
|
||||
; IND-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT1]], 32
|
||||
; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
|
||||
; IND-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
|
||||
; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
|
||||
|
@ -4618,8 +4617,7 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
|
|||
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
|
||||
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; UNROLL: vector.scevcheck:
|
||||
; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
|
||||
; UNROLL-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[TMP0]], 2147483648
|
||||
; UNROLL-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
|
||||
; UNROLL-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
|
||||
; UNROLL: vector.ph:
|
||||
; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -4
|
||||
|
@ -4629,17 +4627,17 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
|
|||
; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
|
||||
; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
|
||||
; UNROLL-NEXT: [[TMP1:%.*]] = ashr exact i64 [[SEXT]], 32
|
||||
; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
|
||||
; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
|
||||
; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
|
||||
; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 2
|
||||
; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
|
||||
; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP5]], align 4
|
||||
; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
|
||||
; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
|
||||
; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>*
|
||||
; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4
|
||||
; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 2
|
||||
; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
|
||||
; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP4]], align 4
|
||||
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
|
||||
; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; UNROLL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
|
||||
; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
|
||||
; UNROLL: middle.block:
|
||||
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]]
|
||||
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||
|
@ -4650,8 +4648,8 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
|
|||
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||
; UNROLL-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
||||
; UNROLL-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
|
||||
; UNROLL-NEXT: [[TMP7:%.*]] = ashr exact i64 [[SEXT2]], 32
|
||||
; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
|
||||
; UNROLL-NEXT: [[TMP6:%.*]] = ashr exact i64 [[SEXT2]], 32
|
||||
; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
|
||||
; UNROLL-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
|
||||
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
|
||||
|
@ -4715,8 +4713,7 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
|
|||
; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8
|
||||
; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; INTERLEAVE: vector.scevcheck:
|
||||
; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
|
||||
; INTERLEAVE-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[TMP0]], 2147483648
|
||||
; INTERLEAVE-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
|
||||
; INTERLEAVE-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
|
||||
; INTERLEAVE: vector.ph:
|
||||
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -8
|
||||
|
@ -4726,17 +4723,17 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
|
|||
; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
|
||||
; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
|
||||
; INTERLEAVE-NEXT: [[TMP1:%.*]] = ashr exact i64 [[SEXT]], 32
|
||||
; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
|
||||
; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
|
||||
; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP3]], align 4
|
||||
; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 4
|
||||
; INTERLEAVE-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
|
||||
; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP5]], align 4
|
||||
; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
|
||||
; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
|
||||
; INTERLEAVE-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
|
||||
; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP2]], align 4
|
||||
; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
|
||||
; INTERLEAVE-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
|
||||
; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP4]], align 4
|
||||
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
||||
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
|
||||
; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; INTERLEAVE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
|
||||
; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
|
||||
; INTERLEAVE: middle.block:
|
||||
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]]
|
||||
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||
|
@ -4747,8 +4744,8 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
|
|||
; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
||||
; INTERLEAVE-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
||||
; INTERLEAVE-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
|
||||
; INTERLEAVE-NEXT: [[TMP7:%.*]] = ashr exact i64 [[SEXT2]], 32
|
||||
; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
|
||||
; INTERLEAVE-NEXT: [[TMP6:%.*]] = ashr exact i64 [[SEXT2]], 32
|
||||
; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
|
||||
; INTERLEAVE-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
|
||||
; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
|
||||
|
|
|
@ -21,7 +21,7 @@ define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtab
|
|||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1, !dbg [[DBG9:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG9]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1, !dbg [[DBG9]]
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3, !dbg [[DBG9]]
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4, !dbg [[DBG9]]
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]], !dbg [[DBG9]]
|
||||
; CHECK: vector.memcheck:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[A1]], [[B2]], !dbg [[DBG9]]
|
||||
|
|
Loading…
Reference in New Issue