From 9a432161c68774e6c717616e3d688142e89bbb42 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 24 Nov 2019 19:24:10 +0000 Subject: [PATCH] [LoopInterchange] Adjust assertions when updating successors. Currently the assertion in updateSuccessor is overly strict in some cases and overly relaxed in other cases. For branches to the inner and outer loop preheader it is too strict, because they can either be unconditional branches or conditional branches with duplicate targets. Both cases are fine and we can allow updating multiple successors. On the other hand, we have to at least update one successor. This patch adds such an assertion. --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 54 ++++--- .../update-condbranch-duplicate-successors.ll | 145 ++++++++++++++++++ 2 files changed, 180 insertions(+), 19 deletions(-) create mode 100644 llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 352aff19965e..2c4937b6bef2 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1316,25 +1316,32 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { FromBB->getTerminator()->getIterator()); } -/// Update BI to jump to NewBB instead of OldBB. Records updates to -/// the dominator tree in DTUpdates, if DT should be preserved. +// Update BI to jump to NewBB instead of OldBB. Records updates to the +// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that +// \p OldBB is exactly once in BI's successor list. static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, - std::vector &DTUpdates) { - assert(llvm::count_if(successors(BI), - [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && - "BI must jump to OldBB at most once."); - for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) { - if (BI->getSuccessor(i) == OldBB) { - BI->setSuccessor(i, NewBB); - - DTUpdates.push_back( - {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); - DTUpdates.push_back( - {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); - break; + std::vector &DTUpdates, + bool MustUpdateOnce = true) { + assert((!MustUpdateOnce || + llvm::count_if(successors(BI), + [OldBB](BasicBlock *BB) { + return BB == OldBB; + }) == 1) && "BI must jump to OldBB exactly once."); + bool Changed = false; + for (Use &Op : BI->operands()) + if (Op == OldBB) { + Op.set(NewBB); + Changed = true; } + + if (Changed) { + DTUpdates.push_back( + {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); + DTUpdates.push_back( + {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); } + assert(Changed && "Expected a successor to be updated"); } // Move Lcssa PHIs to the right place. @@ -1481,12 +1488,21 @@ bool LoopInterchangeTransform::adjustLoopBranches() { if (!InnerLoopHeaderSuccessor) return false; - // Adjust Loop Preheader and headers + // Adjust Loop Preheader and headers. + // The branches in the outer loop predecessor and the outer loop header can + // be unconditional branches or conditional branches with duplicates. Consider + // this when updating the successors. updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader, - InnerLoopPreHeader, DTUpdates); - updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates); + InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false); + // The outer loop header might or might not branch to the outer latch. + // We are guaranteed to branch to the inner loop preheader. + if (std::find(succ_begin(OuterLoopHeaderBI), succ_end(OuterLoopHeaderBI), + OuterLoopLatch) != succ_end(OuterLoopHeaderBI)) + updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates, + /*MustUpdateOnce=*/false); updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, - InnerLoopHeaderSuccessor, DTUpdates); + InnerLoopHeaderSuccessor, DTUpdates, + /*MustUpdateOnce=*/false); // Adjust reduction PHI's now that the incoming block has changed. InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader, diff --git a/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll b/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll new file mode 100644 index 000000000000..3f178443ee6d --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-interchange -S %s | FileCheck %s + + +@global = external dso_local global [1000 x [1000 x i32]], align 16 + +; Test that we support updating conditional branches where both targets are the same +; in the predecessor of the outer loop header. + +define void @foo(i1 %cmp) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[INNER_HEADER_PREHEADER:%.*]], label [[INNER_HEADER_PREHEADER]] +; CHECK: bb1: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[BB1:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: br label [[INNER_HEADER_SPLIT1:%.*]] +; CHECK: inner.header.preheader: +; CHECK-NEXT: br label [[INNER_HEADER:%.*]] +; CHECK: inner.header: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[TMP0:%.*]], [[INNER_HEADER_SPLIT:%.*]] ], [ 5, [[INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[BB1]] +; CHECK: inner.header.split1: +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[OUTER_IV]] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]] +; CHECK-NEXT: [[V:%.*]] = mul i32 [[LV]], 100 +; CHECK-NEXT: store i32 [[V]], i32* [[PTR]] +; CHECK-NEXT: [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[COND1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 1000 +; CHECK-NEXT: br label [[OUTER_LATCH]] +; CHECK: inner.header.split: +; CHECK-NEXT: [[TMP0]] = add nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 1000 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[INNER_HEADER]] +; CHECK: outer.latch: +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1 +; CHECK-NEXT: [[COND2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[COND2]], label [[INNER_HEADER_SPLIT]], label [[OUTER_HEADER]] +; CHECK: bb9: +; CHECK-NEXT: br label [[BB10:%.*]] +; CHECK: bb10: +; CHECK-NEXT: ret void +; +entry: + br i1 %cmp, label %bb1, label %bb1 + +bb1: ; preds = %entry, %entry + br label %outer.header + +outer.header: ; preds = %outer.latch, %bb1 + %outer.iv = phi i64 [ 0, %bb1], [ %outer.iv.next, %outer.latch ] + br label %inner.header + +inner.header: ; preds = %inner.header, %outer.header + %inner.iv = phi i64 [ %inner.iv.next, %inner.header ], [ 5, %outer.header ] + %ptr = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 %inner.iv, i64 %outer.iv + %lv = load i32, i32* %ptr + %v = mul i32 %lv, 100 + store i32 %v, i32* %ptr + %inner.iv.next = add nsw i64 %inner.iv, 1 + %cond1 = icmp eq i64 %inner.iv.next , 1000 + br i1 %cond1, label %outer.latch, label %inner.header + +outer.latch: ; preds = %inner.header + %outer.iv.next = add nuw nsw i64 %outer.iv, 1 + %cond2 = icmp eq i64 %outer.iv.next, 1000 + br i1 %cond2, label %bb9, label %outer.header + +bb9: ; preds = %outer.latch + br label %bb10 + +bb10: ; preds = %bb9 + ret void +} + + +define void @foo1(i1 %cmp) { +; CHECK-LABEL: @foo1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: br i1 [[CMP]], label [[INNER_HEADER_PREHEADER:%.*]], label [[INNER_HEADER_PREHEADER]] +; CHECK: outer.header.preheader: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER:%.*]] ] +; CHECK-NEXT: br i1 [[CMP]], label [[INNER_HEADER_SPLIT1:%.*]], label [[INNER_HEADER_SPLIT1]] +; CHECK: inner.header.preheader: +; CHECK-NEXT: br label [[INNER_HEADER:%.*]] +; CHECK: inner.header: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[TMP0:%.*]], [[INNER_HEADER_SPLIT:%.*]] ], [ 5, [[INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[OUTER_HEADER_PREHEADER]] +; CHECK: inner.header.split1: +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[OUTER_IV]] +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]] +; CHECK-NEXT: [[V:%.*]] = mul i32 [[LV]], 100 +; CHECK-NEXT: store i32 [[V]], i32* [[PTR]] +; CHECK-NEXT: [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[COND1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 1000 +; CHECK-NEXT: br label [[OUTER_LATCH]] +; CHECK: inner.header.split: +; CHECK-NEXT: [[TMP0]] = add nsw i64 [[INNER_IV]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 1000 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[INNER_HEADER]] +; CHECK: outer.latch: +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1 +; CHECK-NEXT: [[COND2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[COND2]], label [[INNER_HEADER_SPLIT]], label [[OUTER_HEADER]] +; CHECK: bb9: +; CHECK-NEXT: br label [[BB10:%.*]] +; CHECK: bb10: +; CHECK-NEXT: ret void +; +entry: + br i1 %cmp, label %bb1, label %bb1 + +bb1: ; preds = %entry, %entry + br i1 %cmp, label %outer.header, label %outer.header + +outer.header: ; preds = %outer.latch, %bb1 + %outer.iv = phi i64 [ 0, %bb1 ], [ 0, %bb1 ], [ %outer.iv.next, %outer.latch ] + br i1 %cmp, label %inner.header, label %inner.header + +inner.header: ; preds = %inner.header, %outer.header + %inner.iv = phi i64 [ %inner.iv.next, %inner.header ], [ 5, %outer.header ], [ 5, %outer.header ] + %ptr = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 %inner.iv, i64 %outer.iv + %lv = load i32, i32* %ptr + %v = mul i32 %lv, 100 + store i32 %v, i32* %ptr + %inner.iv.next = add nsw i64 %inner.iv, 1 + %cond1 = icmp eq i64 %inner.iv.next , 1000 + br i1 %cond1, label %outer.latch, label %inner.header + +outer.latch: ; preds = %inner.header + %outer.iv.next = add nuw nsw i64 %outer.iv, 1 + %cond2 = icmp eq i64 %outer.iv.next, 1000 + br i1 %cond2, label %bb9, label %outer.header + +bb9: ; preds = %outer.latch + br label %bb10 + +bb10: ; preds = %bb9 + ret void +}