From 78b8ce40efeb578534543fcb948cb9db22e5b81f Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 13 Mar 2021 09:12:26 +0300 Subject: [PATCH] Reland [SCEV] Improve modelling for (null) pointer constants This reverts commit 329aeb5db43f5e69df038fb20d2def77fe6f8595, and relands commit 61f006ac655431bd44b9e089f74c73bec0c1a48c. This is a continuation of D89456. As it was suggested there, now that SCEV models `PtrToInt`, we can try to improve SCEV's pointer handling. In particular, i believe, i will need this in the future to further fix `SCEVAddExpr`operation type handling. This removes special handling of `ConstantPointerNull` from `ScalarEvolution::createSCEV()`, and add constant folding into `ScalarEvolution::getPtrToIntExpr()`. This way, `null` constants stay as such in SCEV's, but gracefully become zero integers when asked. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D98147 --- llvm/lib/Analysis/ScalarEvolution.cpp | 24 +-- llvm/test/Analysis/ScalarEvolution/load.ll | 2 +- .../max-backedge-taken-count-guard-info.ll | 12 +- .../ScalarEvolution/scalable-vector.ll | 2 +- .../AMDGPU/splitkit-getsubrangeformask.ll | 14 +- llvm/test/CodeGen/PowerPC/pr43527.ll | 5 +- llvm/test/CodeGen/PowerPC/pr48519.ll | 9 +- llvm/test/CodeGen/PowerPC/sms-phi.ll | 8 +- llvm/test/Other/constant-fold-gep.ll | 10 +- .../IndVarSimplify/2011-11-01-lftrptr.ll | 8 +- .../IndVarSimplify/widen-i32-i8ptr.ll | 9 +- .../2011-10-03-CritEdgeMerge.ll | 16 +- .../AMDGPU/lsr-postinc-pos-addrspace.ll | 162 +++++++++++------- .../X86/eh-insertion-point.ll | 11 +- .../LoopVectorize/X86/cost-model-assert.ll | 103 +++++------ .../LoopVectorize/pointer-induction.ll | 10 +- .../Utils/ScalarEvolutionExpanderTest.cpp | 29 ++-- polly/lib/Analysis/ScopBuilder.cpp | 10 ++ polly/lib/Support/SCEVAffinator.cpp | 11 +- polly/lib/Support/SCEVValidator.cpp | 5 + .../partial_write_impossible_restriction.ll | 15 +- .../CodeGen/scev_looking_through_bitcasts.ll | 8 +- 22 files changed, 276 insertions(+), 207 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 7ffb38467386..c94aca576282 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1065,15 +1065,23 @@ const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty, return getTruncateOrZeroExtend(S, Ty); // If not, is this expression something we can't reduce any further? - if (isa(Op)) { - // Create an explicit cast node. - // We can reuse the existing insert position since if we get here, - // we won't have made any changes which would invalidate it. + if (auto *U = dyn_cast(Op)) { Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType()); assert(getDataLayout().getTypeSizeInBits(getEffectiveSCEVType( Op->getType())) == getDataLayout().getTypeSizeInBits(IntPtrTy) && "We can only model ptrtoint if SCEV's effective (integer) type is " "sufficiently wide to represent all possible pointer values."); + + // Perform some basic constant folding. If the operand of the ptr2int cast + // is a null pointer, don't create a ptr2int SCEV expression (that will be + // left as-is), but produce a zero constant. + // NOTE: We could handle a more general case, but lack motivational cases. + if (isa(U->getValue())) + return getZero(Ty); + + // Create an explicit cast node. + // We can reuse the existing insert position since if we get here, + // we won't have made any changes which would invalidate it. SCEV *S = new (SCEVAllocator) SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy); UniqueSCEVs.InsertNode(S, IP); @@ -6366,9 +6374,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getUnknown(UndefValue::get(V->getType())); } else if (ConstantInt *CI = dyn_cast(V)) return getConstant(CI); - else if (isa(V)) - // FIXME: we shouldn't special-case null pointer constant. - return getZero(V->getType()); else if (GlobalAlias *GA = dyn_cast(V)) return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee()); else if (!isa(V)) @@ -6708,11 +6713,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { Value *Ptr = U->getOperand(0); const SCEV *Op = getSCEV(Ptr); Type *DstIntTy = U->getType(); - // SCEV doesn't have constant pointer expression type, but it supports - // nullptr constant (and only that one), which is modelled in SCEV as a - // zero integer constant. So just skip the ptrtoint cast for constants. - if (isa(Op)) - return getTruncateOrZeroExtend(Op, DstIntTy); Type *PtrTy = Ptr->getType(); Type *IntPtrTy = getDataLayout().getIntPtrType(PtrTy); // But only if effective SCEV (integer) type is wide enough to represent diff --git a/llvm/test/Analysis/ScalarEvolution/load.ll b/llvm/test/Analysis/ScalarEvolution/load.ll index f41d20cc9576..9a9942aae849 100644 --- a/llvm/test/Analysis/ScalarEvolution/load.ll +++ b/llvm/test/Analysis/ScalarEvolution/load.ll @@ -82,7 +82,7 @@ define i32 @test2() nounwind uwtable readonly { ; CHECK-NEXT: %next = getelementptr inbounds %struct.ListNode, %struct.ListNode* %n.01, i64 0, i32 0 ; CHECK-NEXT: --> %n.01 U: full-set S: full-set Exits: @node1 LoopDispositions: { %for.body: Variant } ; CHECK-NEXT: %1 = load %struct.ListNode*, %struct.ListNode** %next, align 8 -; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant } +; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: null LoopDispositions: { %for.body: Variant } ; CHECK-NEXT: Determining loop execution counts for: @test2 ; CHECK-NEXT: Loop %for.body: backedge-taken count is 4 ; CHECK-NEXT: Loop %for.body: max backedge-taken count is 4 diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 48dc484635a6..98b4dc333c0a 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -531,17 +531,17 @@ define void @crash(i8* %ptr) { ; CHECK-LABEL: 'crash' ; CHECK-NEXT: Classifying expressions for: @crash ; CHECK-NEXT: %text.addr.5 = phi i8* [ %incdec.ptr112, %while.cond111 ], [ null, %while.body ] -; CHECK-NEXT: --> {0,+,-1}<%while.cond111> U: full-set S: full-set Exits: <> LoopDispositions: { %while.cond111: Computable, %while.body: Variant } +; CHECK-NEXT: --> {null,+,-1}<%while.cond111> U: full-set S: full-set Exits: <> LoopDispositions: { %while.cond111: Computable, %while.body: Variant } ; CHECK-NEXT: %incdec.ptr112 = getelementptr inbounds i8, i8* %text.addr.5, i64 -1 -; CHECK-NEXT: --> {-1,+,-1}<%while.cond111> U: full-set S: full-set Exits: <> LoopDispositions: { %while.cond111: Computable, %while.body: Variant } +; CHECK-NEXT: --> {(-1 + null),+,-1}<%while.cond111> U: full-set S: full-set Exits: <> LoopDispositions: { %while.cond111: Computable, %while.body: Variant } ; CHECK-NEXT: %lastout.2271 = phi i8* [ %incdec.ptr126, %while.body125 ], [ %ptr, %while.end117 ] -; CHECK-NEXT: --> {%ptr,+,1}<%while.body125> U: full-set S: full-set Exits: {-2,+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } +; CHECK-NEXT: --> {%ptr,+,1}<%while.body125> U: full-set S: full-set Exits: {(-2 + null),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: %incdec.ptr126 = getelementptr inbounds i8, i8* %lastout.2271, i64 1 -; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: [1,0) S: [1,0) Exits: {-1,+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } +; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: [1,0) S: [1,0) Exits: {(-1 + null),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: Determining loop execution counts for: @crash -; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr)),+,-1}<%while.cond111> +; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<%while.cond111> ; CHECK-NEXT: Loop %while.body125: max backedge-taken count is -1 -; CHECK-NEXT: Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * %ptr)),+,-1}<%while.cond111> +; CHECK-NEXT: Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<%while.cond111> ; CHECK-NEXT: Predicates: ; CHECK: Loop %while.body125: Trip multiple is 1 ; CHECK-NEXT: Loop %while.cond111: Unpredictable backedge-taken count. diff --git a/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll b/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll index 135d3305061e..ac11de6f3581 100644 --- a/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll +++ b/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll @@ -6,7 +6,7 @@ define void @a( *%p) { ; CHECK-LABEL: 'a' ; CHECK-NEXT: Classifying expressions for: @a ; CHECK-NEXT: %1 = getelementptr , * null, i32 3 -; CHECK-NEXT: --> (3 * sizeof()) U: [0,-15) S: [-9223372036854775808,9223372036854775793) +; CHECK-NEXT: --> ((3 * sizeof()) + null) U: [0,-15) S: [-9223372036854775808,9223372036854775793) ; CHECK-NEXT: %2 = getelementptr , * %p, i32 1 ; CHECK-NEXT: --> (sizeof() + %p) U: full-set S: full-set ; CHECK-NEXT: Determining loop execution counts for: @a diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index f5d783134184..3543c143838a 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -185,7 +185,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc ; CHECK: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc ; CHECK: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc - ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0, 0 :: (load 8 from %ir.304, addrspace 4) + ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0, 0 :: (load 8 from %ir.308, addrspace 4) ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0, 0 :: (load 16 from %ir.223, addrspace 4) ; CHECK: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0, 0 :: (load 16 from %ir.230, addrspace 4) @@ -202,16 +202,16 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc ; CHECK: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc ; CHECK: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc - ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0, 0 :: (load 8 from %ir.316, addrspace 4) + ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0, 0 :: (load 8 from %ir.320, addrspace 4) ; CHECK: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0, 0 :: (load 16 from %ir.278, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0, 0 :: (load 16 from %ir.282, addrspace 4) ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0, 0 :: (load 4 from `i32 addrspace(4)* undef`, addrspace 4) ; CHECK: KILL %411.sub0, %411.sub1 ; CHECK: KILL undef %488:sreg_64 ; CHECK: KILL %71.sub0_sub1 ; CHECK: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 3, implicit-def dead $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0, 0 :: (load 16 from %ir.287, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0, 0 :: (load 16 from %ir.291, addrspace 4) ; CHECK: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc ; CHECK: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc @@ -234,13 +234,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0, 0 :: (load 16 from %ir.347, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0, 0 :: (load 16 from %ir.351, addrspace 4) ; CHECK: undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK: %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0, 0 :: (load 16 from %ir.353, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0, 0 :: (load 16 from %ir.357, addrspace 4) ; CHECK: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0, 0 :: (load 16 from %ir.359, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0, 0 :: (load 16 from %ir.363, addrspace 4) ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll index 023898a46cc1..0a03c3f7112f 100644 --- a/llvm/test/CodeGen/PowerPC/pr43527.ll +++ b/llvm/test/CodeGen/PowerPC/pr43527.ll @@ -19,15 +19,14 @@ define dso_local void @test(i64 %arg, i64 %arg1) { ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: sub r30, r4, r3 -; CHECK-NEXT: li r29, 0 +; CHECK-NEXT: li r29, -4 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_3: # %bb5 ; CHECK-NEXT: # -; CHECK-NEXT: lfsx f1, 0, r29 +; CHECK-NEXT: lfsu f1, 4(r29) ; CHECK-NEXT: bl lrint ; CHECK-NEXT: nop ; CHECK-NEXT: addi r30, r30, -1 -; CHECK-NEXT: addi r29, r29, 4 ; CHECK-NEXT: cmpldi r30, 0 ; CHECK-NEXT: bne cr0, .LBB0_3 ; CHECK-NEXT: # %bb.4: # %bb15 diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll index 035cc49b93e6..3552c049e321 100644 --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -265,16 +265,17 @@ define void @func_48785(half %arg) #0 { ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: fmr f31, f1 ; CHECK-NEXT: li r30, 0 +; CHECK-NEXT: li r29, 0 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_1: # %bb1 ; CHECK-NEXT: # ; CHECK-NEXT: fmr f1, f31 -; CHECK-NEXT: sldi r29, r30, 1 ; CHECK-NEXT: bl __gnu_f2h_ieee ; CHECK-NEXT: nop -; CHECK-NEXT: addi r30, r30, 12 -; CHECK-NEXT: sth r3, 0(r29) -; CHECK-NEXT: cmpldi r30, 0 +; CHECK-NEXT: addi r29, r29, -12 +; CHECK-NEXT: sth r3, 0(r30) +; CHECK-NEXT: addi r30, r30, 24 +; CHECK-NEXT: cmpldi r29, 0 ; CHECK-NEXT: bne+ cr0, .LBB3_1 ; CHECK-NEXT: # %bb.2: # %bb5 ; diff --git a/llvm/test/CodeGen/PowerPC/sms-phi.ll b/llvm/test/CodeGen/PowerPC/sms-phi.ll index 93975663f1bc..3ddf78157d71 100644 --- a/llvm/test/CodeGen/PowerPC/sms-phi.ll +++ b/llvm/test/CodeGen/PowerPC/sms-phi.ll @@ -4,11 +4,11 @@ ; RUN: >/dev/null | FileCheck %s define dso_local void @sha512() #0 { ;CHECK: prolog: -;CHECK: %16:g8rc = ADD8 %21:g8rc, %20:g8rc +;CHECK: %18:g8rc = ADD8 %24:g8rc, %23:g8rc ;CHECK: epilog: -;CHECK: %23:g8rc_and_g8rc_nox0 = PHI %5:g8rc_and_g8rc_nox0, %bb.3, %18:g8rc_and_g8rc_nox0, %bb.4 -;CHECK-NEXT: %24:g8rc = PHI %6:g8rc, %bb.3, %16:g8rc, %bb.4 -;CHECK-NEXT: %25:g8rc = PHI %6:g8rc, %bb.3, %19:g8rc, %bb.4 +;CHECK: %28:g8rc_and_g8rc_nox0 = PHI %6:g8rc_and_g8rc_nox0, %bb.3, %22:g8rc_and_g8rc_nox0, %bb.4 +;CHECK-NEXT: %29:g8rc = PHI %12:g8rc, %bb.3, %16:g8rc, %bb.4 +;CHECK-NEXT: %30:g8rc = PHI %15:g8rc, %bb.3, %19:g8rc, %bb.4 br label %1 1: ; preds = %1, %0 diff --git a/llvm/test/Other/constant-fold-gep.ll b/llvm/test/Other/constant-fold-gep.ll index ec39c3229b2d..7d3736b89b49 100644 --- a/llvm/test/Other/constant-fold-gep.ll +++ b/llvm/test/Other/constant-fold-gep.ll @@ -192,9 +192,9 @@ ; SCEV: %t = bitcast i1* getelementptr (i1, i1* inttoptr (i32 1 to i1*), i32 -2) to i1* ; SCEV: --> (-2 + inttoptr (i32 1 to i1*)) ; SCEV: Classifying expressions for: @hoo8 -; SCEV: --> -1 +; SCEV: --> (-1 + null) U: [-1,0) S: [-1,0) ; SCEV: Classifying expressions for: @hoo1 -; SCEV: --> -1 +; SCEV: --> (-1 + null) U: [-1,0) S: [-1,0) define i8* @goo8() nounwind { %t = bitcast i8* getelementptr (i8, i8* inttoptr (i32 1 to i8*), i32 -1) to i8* @@ -408,13 +408,13 @@ define i64 @fi() nounwind { ; TO: } ; SCEV: Classifying expressions for: @fM ; SCEV: %t = bitcast i64* getelementptr (i64, i64* null, i32 1) to i64* -; SCEV: --> 8 +; SCEV: --> (8 + null) U: [8,9) S: [8,9) ; SCEV: Classifying expressions for: @fN ; SCEV: %t = bitcast i64* getelementptr ({ i64, i64 }, { i64, i64 }* null, i32 0, i32 1) to i64* -; SCEV: --> 8 +; SCEV: --> (8 + null) U: [8,9) S: [8,9) ; SCEV: Classifying expressions for: @fO ; SCEV: %t = bitcast i64* getelementptr ([2 x i64], [2 x i64]* null, i32 0, i32 1) to i64* -; SCEV: --> 8 +; SCEV: --> (8 + null) U: [8,9) S: [8,9) define i64* @fM() nounwind { %t = bitcast i64* getelementptr (i64, i64* null, i32 1) to i64* diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll index e154cc38b117..5c0daa0784f7 100644 --- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll +++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll @@ -150,13 +150,13 @@ define i8 @testnullptrint(i8* %buf, i8* %end) nounwind { ; PTR64-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]] ; PTR64-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 ; PTR64-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 -; PTR64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i8* +; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP4]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[TMP5]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -171,16 +171,16 @@ define i8 @testnullptrint(i8* %buf, i8* %end) nounwind { ; PTR32-NEXT: [[BI:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32 ; PTR32-NEXT: [[EI:%.*]] = ptrtoint i8* [[END:%.*]] to i32 ; PTR32-NEXT: [[CNT:%.*]] = sub i32 [[EI]], [[BI]] -; PTR32-NEXT: [[CNT1:%.*]] = inttoptr i32 [[CNT]] to i8* ; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i32 0, [[CNT]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i32 [[CNT]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[CNT1]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] diff --git a/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll b/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll index 7b413b7f96bf..03b0e2649b28 100644 --- a/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll @@ -11,13 +11,12 @@ define dso_local void @Widen_i32_i8ptr() local_unnamed_addr { ; CHECK-NEXT: store i8** [[ARRAYDECAY2032]], i8*** inttoptr (i64 8 to i8***), align 8 ; CHECK-NEXT: br label [[FOR_COND2106:%.*]] ; CHECK: for.cond2106: -; CHECK-NEXT: [[GID_0:%.*]] = phi i8* [ null, [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[FOR_COND2106]] ] -; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC2117:%.*]], [[FOR_COND2106]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND2106]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[GID_0:%.*]] = phi i8* [ null, [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_COND2106]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[GID_0]], i64 1 -; CHECK-NEXT: [[IDXPROM2114:%.*]] = zext i32 [[I_0]] to i64 -; CHECK-NEXT: [[ARRAYIDX2115:%.*]] = getelementptr inbounds [15 x i8*], [15 x i8*]* [[PTRIDS]], i64 0, i64 [[IDXPROM2114]] +; CHECK-NEXT: [[ARRAYIDX2115:%.*]] = getelementptr inbounds [15 x i8*], [15 x i8*]* [[PTRIDS]], i64 0, i64 [[INDVARS_IV]] ; CHECK-NEXT: store i8* [[GID_0]], i8** [[ARRAYIDX2115]], align 8 -; CHECK-NEXT: [[INC2117]] = add nuw nsw i32 [[I_0]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[FOR_COND2106]] ; entry: diff --git a/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll b/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll index d851499e25dc..58ffe68efffb 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll @@ -16,9 +16,8 @@ define i8* @test1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1 -; CHECK-NEXT: [[LSR_IV_NEXT1:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i8* +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[LOOP]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV]], i64 1 ; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]] @@ -37,7 +36,7 @@ define i8* @test1() { ; CHECK: bbB.bb89_crit_edge: ; CHECK-NEXT: br label [[BB89]] ; CHECK: bb89: -; CHECK-NEXT: [[TMP75PHI:%.*]] = phi i8* [ [[LSR_IV_NEXT1]], [[BBA_BB89_CRIT_EDGE]] ], [ [[LSR_IV_NEXT1]], [[BBB_BB89_CRIT_EDGE]] ] +; CHECK-NEXT: [[TMP75PHI:%.*]] = phi i8* [ [[SCEVGEP]], [[BBA_BB89_CRIT_EDGE]] ], [ [[SCEVGEP]], [[BBB_BB89_CRIT_EDGE]] ] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i8* [[TMP75PHI]] @@ -81,9 +80,8 @@ define i8* @test2() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1 -; CHECK-NEXT: [[LSR_IV_NEXT1:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i8* +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[LOOP]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV]], i64 1 ; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]] @@ -100,10 +98,10 @@ define i8* @test2() { ; CHECK: bbB.exit_crit_edge: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: bb89: -; CHECK-NEXT: [[TMP75PHI:%.*]] = phi i8* [ [[LSR_IV_NEXT1]], [[BBA]] ], [ [[LSR_IV_NEXT1]], [[BBA]] ], [ [[LSR_IV_NEXT1]], [[BBA]] ] +; CHECK-NEXT: [[TMP75PHI:%.*]] = phi i8* [ [[SCEVGEP]], [[BBA]] ], [ [[SCEVGEP]], [[BBA]] ], [ [[SCEVGEP]], [[BBA]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[RESULT:%.*]] = phi i8* [ [[TMP75PHI]], [[BB89]] ], [ [[LSR_IV_NEXT1]], [[BBB_EXIT_CRIT_EDGE]] ] +; CHECK-NEXT: [[RESULT:%.*]] = phi i8* [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP]], [[BBB_EXIT_CRIT_EDGE]] ] ; CHECK-NEXT: ret i8* [[RESULT]] ; entry: diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll index fdbd0dada2c8..b77f2e9de7a7 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll @@ -1,131 +1,177 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -print-lsr-output < %s 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -march=amdgcn -mcpu=bonaire -loop-reduce -S < %s | FileCheck %s ; Test various conditions where OptimizeLoopTermCond doesn't look at a ; memory instruction use and fails to find the address space. target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" -; CHECK-LABEL: @local_cmp_user( -; CHECK: bb11: -; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 2, %entry ] -; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ] -; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1 -; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, -2 -; CHECK: br i1 - -; CHECK: bb: -; CHECK: inttoptr i32 %lsr.iv.next2 to i8 addrspace(3)* -; CHECK: %c1 = icmp ne i8 addrspace(3)* define amdgpu_kernel void @local_cmp_user(i32 %arg0) nounwind { +; CHECK-LABEL: @local_cmp_user( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG0:%.*]], 1 +; CHECK-NEXT: br label [[BB11:%.*]] +; CHECK: bb11: +; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i32 [ [[LSR_IV_NEXT3:%.*]], [[BB:%.*]] ], [ -2, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1 +; CHECK-NEXT: [[LSR_IV_NEXT3]] = add i32 [[LSR_IV2]], 2 +; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[BB13:%.*]], label [[BB]] +; CHECK: bb: +; CHECK-NEXT: [[T:%.*]] = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef, align 4 +; CHECK-NEXT: [[T1:%.*]] = ptrtoint i8 addrspace(3)* [[T]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to i8 addrspace(3)* +; CHECK-NEXT: [[TMP:%.*]] = inttoptr i32 [[LSR_IV_NEXT3]] to i8 addrspace(3)* +; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 addrspace(3)* [[TMP2]], [[TMP]] +; CHECK-NEXT: br i1 [[C1]], label [[BB11]], label [[BB13]] +; CHECK: bb13: +; CHECK-NEXT: unreachable +; entry: br label %bb11 -bb11: +bb11: ; preds = %bb, %entry %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] %ii = shl i32 %i, 1 %c0 = icmp eq i32 %i, %arg0 br i1 %c0, label %bb13, label %bb -bb: - %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef +bb: ; preds = %bb11 + %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef, align 4 %p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii %c1 = icmp ne i8 addrspace(3)* %p, null %i.next = add i32 %i, 1 br i1 %c1, label %bb11, label %bb13 -bb13: +bb13: ; preds = %bb, %bb11 unreachable } -; CHECK-LABEL: @global_cmp_user( -; CHECK: %lsr.iv1 = phi i64 -; CHECK: %lsr.iv = phi i64 -; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1 -; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, -2 -; CHECK: br i1 - -; CHECK: bb: -; CHECK: inttoptr i64 %lsr.iv.next2 to i8 addrspace(1)* -; CHECK: icmp ne i8 addrspace(1)* %t define amdgpu_kernel void @global_cmp_user(i64 %arg0) nounwind { +; CHECK-LABEL: @global_cmp_user( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG0:%.*]], 1 +; CHECK-NEXT: br label [[BB11:%.*]] +; CHECK: bb11: +; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[BB:%.*]] ], [ -2, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 +; CHECK-NEXT: [[LSR_IV_NEXT3]] = add i64 [[LSR_IV2]], 2 +; CHECK-NEXT: [[C0:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[BB13:%.*]], label [[BB]] +; CHECK: bb: +; CHECK-NEXT: [[T:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 +; CHECK-NEXT: [[T1:%.*]] = ptrtoint i8 addrspace(1)* [[T]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to i8 addrspace(1)* +; CHECK-NEXT: [[TMP:%.*]] = inttoptr i64 [[LSR_IV_NEXT3]] to i8 addrspace(1)* +; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 addrspace(1)* [[TMP2]], [[TMP]] +; CHECK-NEXT: br i1 [[C1]], label [[BB11]], label [[BB13]] +; CHECK: bb13: +; CHECK-NEXT: unreachable +; entry: br label %bb11 -bb11: +bb11: ; preds = %bb, %entry %i = phi i64 [ 0, %entry ], [ %i.next, %bb ] %ii = shl i64 %i, 1 %c0 = icmp eq i64 %i, %arg0 br i1 %c0, label %bb13, label %bb -bb: - %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef +bb: ; preds = %bb11 + %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii %c1 = icmp ne i8 addrspace(1)* %p, null %i.next = add i64 %i, 1 br i1 %c1, label %bb11, label %bb13 -bb13: +bb13: ; preds = %bb, %bb11 unreachable } -; CHECK-LABEL: @global_gep_user( -; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ] -; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ] -; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1 -; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2 -; CHECK: br i1 - -; CHECK: bb: -; CHECK: %idxprom = sext i32 %lsr.iv1 to i64 -; CHECK: getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom define amdgpu_kernel void @global_gep_user(i32 %arg0) nounwind { +; CHECK-LABEL: @global_gep_user( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG0:%.*]], 1 +; CHECK-NEXT: br label [[BB11:%.*]] +; CHECK: bb11: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1 +; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], 2 +; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[BB13:%.*]], label [[BB]] +; CHECK: bb: +; CHECK-NEXT: [[T:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[LSR_IV1]] to i64 +; CHECK-NEXT: [[P:%.*]] = getelementptr i8, i8 addrspace(1)* [[T]], i64 [[IDXPROM]] +; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 addrspace(1)* [[P]], null +; CHECK-NEXT: br i1 [[C1]], label [[BB11]], label [[BB13]] +; CHECK: bb13: +; CHECK-NEXT: unreachable +; entry: br label %bb11 -bb11: +bb11: ; preds = %bb, %entry %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] %ii = shl i32 %i, 1 %c0 = icmp eq i32 %i, %arg0 br i1 %c0, label %bb13, label %bb -bb: - %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef - %p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii +bb: ; preds = %bb11 + %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 + %idxprom = sext i32 %ii to i64 + %p = getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom %c1 = icmp ne i8 addrspace(1)* %p, null %i.next = add i32 %i, 1 br i1 %c1, label %bb11, label %bb13 -bb13: +bb13: ; preds = %bb, %bb11 unreachable } -; CHECK-LABEL: @global_sext_scale_user( -; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ] -; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ] -; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1 -; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2 -; CHECK: br i1 - -; CHECK: bb -; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext define amdgpu_kernel void @global_sext_scale_user(i32 %arg0) nounwind { +; CHECK-LABEL: @global_sext_scale_user( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG0:%.*]], 1 +; CHECK-NEXT: br label [[BB11:%.*]] +; CHECK: bb11: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB]] ], [ [[TMP0]], [[ENTRY]] ] +; CHECK-NEXT: [[II_EXT:%.*]] = sext i32 [[LSR_IV1]] to i64 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1 +; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], 2 +; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[BB13:%.*]], label [[BB]] +; CHECK: bb: +; CHECK-NEXT: [[T:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 +; CHECK-NEXT: [[P:%.*]] = getelementptr i8, i8 addrspace(1)* [[T]], i64 [[II_EXT]] +; CHECK-NEXT: [[C1:%.*]] = icmp ne i8 addrspace(1)* [[P]], null +; CHECK-NEXT: br i1 [[C1]], label [[BB11]], label [[BB13]] +; CHECK: bb13: +; CHECK-NEXT: unreachable +; entry: br label %bb11 -bb11: +bb11: ; preds = %bb, %entry %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] %ii = shl i32 %i, 1 %ii.ext = sext i32 %ii to i64 %c0 = icmp eq i32 %i, %arg0 br i1 %c0, label %bb13, label %bb -bb: - %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef +bb: ; preds = %bb11 + %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext %c1 = icmp ne i8 addrspace(1)* %p, null %i.next = add i32 %i, 1 br i1 %c1, label %bb11, label %bb13 -bb13: +bb13: ; preds = %bb, %bb11 unreachable } diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/eh-insertion-point.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/eh-insertion-point.ll index 1b7748ccaa8a..b25e4c62ac96 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/eh-insertion-point.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/eh-insertion-point.ll @@ -10,19 +10,22 @@ declare void @use1(i1) define void @is_not_null(i8* %baseptr) local_unnamed_addr align 2 personality i8* undef { ; CHECK-LABEL: @is_not_null( ; CHECK-NEXT: preheader: +; CHECK-NEXT: [[BASEPTR1:%.*]] = ptrtoint i8* [[BASEPTR:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[BASEPTR1]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP0]] ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[PTR:%.*]] = phi i8* [ [[INCPTR:%.*]], [[LATCH:%.*]] ], [ [[BASEPTR:%.*]], [[PREHEADER:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i8* [ [[SCEVGEP2:%.*]], [[LATCH:%.*]] ], [ [[SCEVGEP]], [[PREHEADER:%.*]] ] ; CHECK-NEXT: invoke void @maybe_throws() ; CHECK-NEXT: to label [[LATCH]] unwind label [[LPAD:%.*]] ; CHECK: lpad: -; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null -; CHECK-NEXT: [[PTR_IS_NOT_NULL:%.*]] = icmp ne i8* [[PTR]], null +; CHECK-NEXT: [[PTR_IS_NOT_NULL:%.*]] = icmp ne i8* [[LSR_IV]], null ; CHECK-NEXT: call void @use1(i1 [[PTR_IS_NOT_NULL]]) ; CHECK-NEXT: ret void ; CHECK: latch: -; CHECK-NEXT: [[INCPTR]] = getelementptr inbounds i8, i8* [[PTR]], i64 1 +; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, i8* [[LSR_IV]], i64 -1 ; CHECK-NEXT: br label [[HEADER]] ; preheader: diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll index 8fcb5038761f..1b9c8c514691 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll @@ -16,17 +16,8 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 undef, i64 4) -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i32 0 @@ -34,68 +25,68 @@ define void @cff_index_load_offsets(i1 %cond, i8 %x, i8* %p) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw <4 x i32> [[TMP9]], -; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[P:%.*]], align 1, [[TBAA1:!tbaa !.*]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw <4 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw <4 x i32> [[TMP15]], -; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP16]], [[TMP10]] -; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP17]], [[TMP11]] -; CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] -; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] -; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i32> [[TMP22]], zeroinitializer -; CHECK-NEXT: [[TMP25:%.*]] = or <4 x i32> [[TMP23]], zeroinitializer -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0 -; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, [[TBAA4:!tbaa !.*]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw <4 x i32> [[TMP10]], +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw <4 x i32> [[TMP11]], +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP12]], [[TMP6]] +; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP13]], [[TMP7]] +; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] +; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] +; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0 +; CHECK-NEXT: store i32 [[TMP22]], i32* undef, align 4, [[TBAA4:!tbaa !.*]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1 +; CHECK-NEXT: store i32 [[TMP23]], i32* undef, align 4, [[TBAA4]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2 +; CHECK-NEXT: store i32 [[TMP24]], i32* undef, align 4, [[TBAA4]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3 +; CHECK-NEXT: store i32 [[TMP25]], i32* undef, align 4, [[TBAA4]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP21]], i32 0 +; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, [[TBAA4]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP21]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP21]], i32 2 ; CHECK-NEXT: store i32 [[TMP28]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP21]], i32 3 ; CHECK-NEXT: store i32 [[TMP29]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP25]], i32 0 -; CHECK-NEXT: store i32 [[TMP30]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP25]], i32 1 -; CHECK-NEXT: store i32 [[TMP31]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP25]], i32 2 -; CHECK-NEXT: store i32 [[TMP32]], i32* undef, align 4, [[TBAA4]] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP25]], i32 3 -; CHECK-NEXT: store i32 [[TMP33]], i32* undef, align 4, [[TBAA4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[SW_EPILOG:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ null, [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ] ; CHECK-NEXT: br label [[FOR_BODY68:%.*]] ; CHECK: for.body68: ; CHECK-NEXT: [[P_359:%.*]] = phi i8* [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X]] to i32 ; CHECK-NEXT: [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24 -; CHECK-NEXT: [[TMP35:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]] -; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP35]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = load i8, i8* [[P]], align 1, [[TBAA1]] +; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP31]] to i32 ; CHECK-NEXT: [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16 ; CHECK-NEXT: [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]] -; CHECK-NEXT: [[TMP36:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] +; CHECK-NEXT: [[TMP32:%.*]] = load i8, i8* undef, align 1, [[TBAA1]] ; CHECK-NEXT: [[SHL78:%.*]] = shl nuw nsw i32 undef, 8 ; CHECK-NEXT: [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]] ; CHECK-NEXT: [[CONV81:%.*]] = zext i8 undef to i32 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index bed3ba555045..2e15655154db 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -12,11 +12,13 @@ define void @a(i8* readnone %b) { ; CHECK-NEXT: br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[B1]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP0]] +; CHECK-NEXT: [[EXITCOUNT_PTRCNT_TO_INT:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], -1 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -66,7 +68,7 @@ define void @a(i8* readnone %b) { ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 -4 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ] diff --git a/llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp b/llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp index 249ba52413e0..56eb027a68ce 100644 --- a/llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp +++ b/llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp @@ -947,24 +947,31 @@ TEST_F(ScalarEvolutionExpanderTest, ExpandNonIntegralPtrWithNullBase) { Value *V = Exp.expandCodeFor(PtrPlus1, I.getType(), &I); I.replaceAllUsesWith(V); - // Check the expander created bitcast (gep i8* null, %offset). + // Check that the expander created: + // define float addrspace(1)* @test(i64 %off) { + // %scevgep = getelementptr float, float addrspace(1)* null, i64 %off + // %scevgep1 = bitcast float addrspace(1)* %scevgep to i8 addrspace(1)* + // %uglygep = getelementptr i8, i8 addrspace(1)* %scevgep1, i64 1 + // %uglygep2 = bitcast i8 addrspace(1)* %uglygep to float addrspace(1)* + // %ptr = getelementptr inbounds float, float addrspace(1)* null, i64 %off + // ret float addrspace(1)* %uglygep2 + // } + auto *Cast = dyn_cast(V); EXPECT_TRUE(Cast); EXPECT_EQ(Cast->getType(), I.getType()); auto *GEP = dyn_cast(Cast->getOperand(0)); EXPECT_TRUE(GEP); - EXPECT_TRUE(cast(GEP->getPointerOperand())->isNullValue()); - EXPECT_EQ(cast(GEP->getPointerOperand()->getType()) + EXPECT_TRUE(match(GEP->getOperand(1), m_SpecificInt(1))); + auto *Cast1 = dyn_cast(GEP->getPointerOperand()); + EXPECT_TRUE(Cast1); + auto *GEP1 = dyn_cast(Cast1->getOperand(0)); + EXPECT_TRUE(GEP1); + EXPECT_TRUE(cast(GEP1->getPointerOperand())->isNullValue()); + EXPECT_EQ(GEP1->getOperand(1), &*F.arg_begin()); + EXPECT_EQ(cast(GEP1->getPointerOperand()->getType()) ->getAddressSpace(), cast(I.getType())->getAddressSpace()); - - // Check the expander created the expected index computation: add (shl - // %offset, 2), 1. - Value *Arg; - EXPECT_TRUE( - match(GEP->getOperand(1), - m_Add(m_Shl(m_Value(Arg), m_SpecificInt(2)), m_SpecificInt(1)))); - EXPECT_EQ(Arg, &*F.arg_begin()); EXPECT_FALSE(verifyFunction(F, &errs())); }); } diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp index 4def17102925..e12261cb8164 100644 --- a/polly/lib/Analysis/ScopBuilder.cpp +++ b/polly/lib/Analysis/ScopBuilder.cpp @@ -1761,6 +1761,11 @@ bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) { if (DestAccFunc->isZero()) return true; + if (auto *U = dyn_cast(DestAccFunc)) { + if (isa(U->getValue())) + return true; + } + auto *DestPtrSCEV = dyn_cast(SE.getPointerBase(DestAccFunc)); assert(DestPtrSCEV); DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV); @@ -1837,6 +1842,11 @@ bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) { if (ArgSCEV->isZero()) continue; + if (auto *U = dyn_cast(ArgSCEV)) { + if (isa(U->getValue())) + return true; + } + auto *ArgBasePtr = cast(SE.getPointerBase(ArgSCEV)); addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(), ArgBasePtr->getType(), false, {AF}, {nullptr}, CI); diff --git a/polly/lib/Support/SCEVAffinator.cpp b/polly/lib/Support/SCEVAffinator.cpp index 9d2e7d9a6fcb..1691b2aa242b 100644 --- a/polly/lib/Support/SCEVAffinator.cpp +++ b/polly/lib/Support/SCEVAffinator.cpp @@ -551,8 +551,15 @@ PWACtx SCEVAffinator::visitUnknown(const SCEVUnknown *Expr) { } } - llvm_unreachable( - "Unknowns SCEV was neither parameter nor a valid instruction."); + if (isa(Expr->getValue())) { + isl::val v{Ctx, 0}; + isl::space Space{Ctx, 0, NumIterators}; + isl::local_space ls{Space}; + return getPWACtxFromPWA(isl::aff(ls, v)); + } + + llvm_unreachable("Unknowns SCEV was neither a parameter, a constant nor a " + "valid instruction."); } PWACtx SCEVAffinator::complexityBailout() { diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index 94b55167a9b4..0e0ec7358571 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -461,6 +461,11 @@ public: } } + if (Expr->getType()->isPointerTy()) { + if (isa(V)) + return ValidatorResult(SCEVType::INT); // "int" + } + return ValidatorResult(SCEVType::PARAM, Expr); } }; diff --git a/polly/test/Isl/CodeGen/partial_write_impossible_restriction.ll b/polly/test/Isl/CodeGen/partial_write_impossible_restriction.ll index e4c2ce20b82b..3b17518a3ef4 100644 --- a/polly/test/Isl/CodeGen/partial_write_impossible_restriction.ll +++ b/polly/test/Isl/CodeGen/partial_write_impossible_restriction.ll @@ -7,7 +7,7 @@ ; target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -define void @partial_write_impossible_restriction() { +define void @partial_write_impossible_restriction(i32* %.pn) { entry: br i1 undef, label %invoke.cont258, label %cond.true.i.i.i.i1007 @@ -15,7 +15,6 @@ cond.true.i.i.i.i1007: br label %invoke.cont258 invoke.cont258: - %.pn = phi i32* [ null, %cond.true.i.i.i.i1007 ], [ null, %entry ] br label %invoke.cont274 invoke.cont274: ; preds = %invoke.cont258 @@ -49,11 +48,11 @@ if.then.i.i1141.loopexit: ; preds = %cond.end ; CHECK-LABEL: polly.stmt.cond.false: -; CHECK: %polly.access..pn2 = getelementptr i32, i32* %.pn, i64 %polly.indvar -; CHECK: store i32 %cond.in.sroa.speculate.load.cond.false_p_scalar_, i32* %polly.access..pn2, align 4, !alias.scope !0, !noalias !2 +; CHECK: %polly.access..pn{{[0-9]*}} = getelementptr i32, i32* %.pn, i64 %polly.indvar +; CHECK: store i32 %cond.in.sroa.speculate.load.cond.false_p_scalar_, i32* %polly.access..pn{{[0-9]*}}, align 4, !alias.scope !0, !noalias !2 ; CHECK: br label %polly.merge -; CHECK-LABEL: polly.stmt.cond.false11: -; CHECK: %polly.access..pn14 = getelementptr i32, i32* %.pn, i64 0 -; CHECK: store i32 %cond.in.sroa.speculate.load.cond.false_p_scalar_13, i32* %polly.access..pn14, align 4, !alias.scope !0, !noalias !2 -; CHECK: br label %polly.stmt.cond.end15 +; CHECK-LABEL: polly.stmt.cond.false{{[0-9]*}}: +; CHECK: %polly.access..pn{{[0-9]*}} = getelementptr i32, i32* %.pn, i64 0 +; CHECK: store i32 %cond.in.sroa.speculate.load.cond.false_p_scalar_{{[0-9]*}}, i32* %polly.access..pn{{[0-9]*}}, align 4, !alias.scope !0, !noalias !2 +; CHECK: br label %polly.stmt.cond.end{{[0-9]*}} diff --git a/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll b/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll index 1012e23cd3a2..3521f0060c8d 100644 --- a/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll +++ b/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll @@ -31,7 +31,9 @@ bitmap_element_allocate.exit: } -; CHECK: polly.stmt.cond.end73.i: -; CHECK-NEXT: %0 = bitcast %structty** %b.s2a to i8** -; CHECK-NEXT: store i8* undef, i8** %0 + +; CHECK: polly.stmt.cond.end73.i: +; CHECK-NEXT: %scevgep = getelementptr i8, i8* null, i64 %a +; CHECK-NEXT: %scevgep1 = bitcast i8* %scevgep to %structty* +; CHECK-NEXT: store %structty* %scevgep1, %structty** %b.s2a, align 8 ; CHECK-NEXT: br label %polly.exiting