From d7389d626109cc0d611056da8238fa502e12f57c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 15 Dec 2016 05:08:57 +0000 Subject: [PATCH] [MachineBlockPlacement] Don't make blocks "uneditable" Summary: This fixes an issue with MachineBlockPlacement due to a badly timed call to `analyzeBranch` with `AllowModify` set to true. The timeline is as follows: 1. `MachineBlockPlacement::maybeTailDuplicateBlock` calls `TailDup.shouldTailDuplicate` on its argument, which in turn calls `analyzeBranch` with `AllowModify` set to true. 2. This `analyzeBranch` call edits the terminator sequence of the block based on the physical layout of the machine function, turning an unanalyzable non-fallthrough block to a unanalyzable fallthrough block. Normally MBP bails out of rearranging such blocks, but this block was unanalyzable non-fallthrough (and thus rearrangeable) the first time MBP looked at it, and so it goes ahead and decides where it should be placed in the function. 3. When placing this block MBP fails to analyze and thus update the block in keeping with the new physical layout. Concretely, before (1) we have something like: ``` LBL0: < unknown terminator op that may branch to LBL1 > jmp LBL1 LBL1: ... A LBL2: ... B ``` In (2), analyze branch simplifies this to ``` LBL0: < unknown terminator op that may branch to LBL2 > ;; jmp LBL1 <- redundant jump removed LBL1: ... A LBL2: ... B ``` In (3), MachineBlockPlacement goes ahead with its plan of putting LBL2 after the first block since that is profitable. ``` LBL0: < unknown terminator op that may branch to LBL2 > ;; jmp LBL1 <- redundant jump LBL2: ... B LBL1: ... A ``` and the program now has incorrect behavior (we no longer fall-through from `LBL0` to `LBL1`) because MBP can no longer edit LBL0. There are several possible solutions, but I went with removing the teeth off of the `analyzeBranch` calls in TailDuplicator. That makes thinking about the result of these calls easier, and breaks nothing in the lit test suite. I've also added some bookkeeping to the MachineBlockPlacement pass and used that to write an assert that would have caught this. Reviewers: chandlerc, gberry, MatzeB, iteratee Subscribers: mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D27783 llvm-svn: 289764 --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 22 ++++++ llvm/lib/CodeGen/TailDuplicator.cpp | 14 ++-- llvm/test/CodeGen/X86/block-placement.mir | 87 ++++++++++++++++++++++ 3 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/X86/block-placement.mir diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 32c0bcf310cd..89ca641cfe0a 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -324,6 +324,14 @@ class MachineBlockPlacement : public MachineFunctionPass { /// between basic blocks. DenseMap BlockToChain; +#ifndef NDEBUG + /// The set of basic blocks that have terminators that cannot be fully + /// analyzed. These basic blocks cannot be re-ordered safely by + /// MachineBlockPlacement, and we must preserve physical layout of these + /// blocks and their successors through the pass. + SmallPtrSet BlocksWithUnanalyzableExits; +#endif + /// Decrease the UnscheduledPredecessors count for all blocks in chain, and /// if the count goes to 0, add them to the appropriate work list. void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, @@ -1589,6 +1597,7 @@ void MachineBlockPlacement::buildCFGChains() { << getBlockName(BB) << " -> " << getBlockName(NextBB) << "\n"); Chain->merge(NextBB, nullptr); + BlocksWithUnanalyzableExits.insert(&*BB); FI = NextFI; BB = NextBB; } @@ -1661,6 +1670,19 @@ void MachineBlockPlacement::buildCFGChains() { Cond.clear(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. +#ifndef NDEBUG + if (!BlocksWithUnanalyzableExits.count(PrevBB)) { + // Given the exact block placement we chose, we may actually not _need_ to + // be able to edit PrevBB's terminator sequence, but not being _able_ to + // do that at this point is a bug. + assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) || + !PrevBB->canFallThrough()) && + "Unexpected block with un-analyzable fallthrough!"); + Cond.clear(); + TBB = FBB = nullptr; + } +#endif + // The "PrevBB" is not yet updated to reflect current code layout, so, // o. it may fall-through to a block without explicit "goto" instruction // before layout, and no longer fall-through it after layout; or diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 06aa5e1fc0de..7709236bbaa8 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -550,8 +550,8 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // blocks with unanalyzable fallthrough get layed out contiguously. MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; - if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond, true) - && TailBB.canFallThrough()) + if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) && + TailBB.canFallThrough()) return false; // If the target has hardware branch prediction that can handle indirect @@ -660,7 +660,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; - if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) return false; if (!PredCond.empty()) @@ -687,7 +687,7 @@ bool TailDuplicator::duplicateSimpleBB( MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; - if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) continue; Changed = true; @@ -750,7 +750,7 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredTBB, *PredFBB; SmallVector PredCond; - if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) return false; if (!PredCond.empty()) return false; @@ -833,7 +833,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, // Simplify MachineBasicBlock *PredTBB, *PredFBB; SmallVector PredCond; - TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); + TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch @@ -861,7 +861,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, if (PrevBB->succ_size() == 1 && // Layout preds are not always CFG preds. Check. *PrevBB->succ_begin() == TailBB && - !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && + !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) && PriorCond.empty() && (!PriorTBB || PriorTBB == TailBB) && TailBB->pred_size() == 1 && diff --git a/llvm/test/CodeGen/X86/block-placement.mir b/llvm/test/CodeGen/X86/block-placement.mir new file mode 100644 index 000000000000..d225167298f0 --- /dev/null +++ b/llvm/test/CodeGen/X86/block-placement.mir @@ -0,0 +1,87 @@ +# RUN: llc -O3 -run-pass=block-placement -o - %s | FileCheck %s + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.ll" + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + + declare void @stub(i32*) + + define i32 @f(i32* %ptr, i1 %cond) { + entry: + br i1 %cond, label %left, label %right + + left: ; preds = %entry + %is_null = icmp eq i32* %ptr, null + br i1 %is_null, label %null, label %not_null, !prof !0, !make.implicit !1 + + not_null: ; preds = %left + %val = load i32, i32* %ptr + ret i32 %val + + null: ; preds = %left + call void @stub(i32* %ptr) + unreachable + + right: ; preds = %entry + call void @stub(i32* null) + unreachable + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + + !0 = !{!"branch_weights", i32 1048575, i32 1} + !1 = !{} + +... +--- +# CHECK: name: f +name: f +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '%rdi' } + - { reg: '%esi' } + +# CHECK: %eax = FAULTING_LOAD_OP %bb.3.null, 1684, killed %rdi, 1, _, 0, _ :: (load 4 from %ir.ptr) +# CHECK-NEXT: JMP_1 %bb.2.not_null +# CHECK: bb.3.null: +# CHECK: bb.4.right: +# CHECK: bb.2.not_null: + +body: | + bb.0.entry: + successors: %bb.1.left(0x7ffff800), %bb.3.right(0x00000800) + liveins: %esi, %rdi + + frame-setup PUSH64r undef %rax, implicit-def %rsp, implicit %rsp + CFI_INSTRUCTION def_cfa_offset 16 + TEST8ri %sil, 1, implicit-def %eflags, implicit killed %esi + JE_1 %bb.3.right, implicit killed %eflags + + bb.1.left: + successors: %bb.2.null(0x7ffff800), %bb.4.not_null(0x00000800) + liveins: %rdi + + %eax = FAULTING_LOAD_OP %bb.2.null, 1684, killed %rdi, 1, _, 0, _ :: (load 4 from %ir.ptr) + JMP_1 %bb.4.not_null + + bb.4.not_null: + liveins: %rdi, %eax + + %rcx = POP64r implicit-def %rsp, implicit %rsp + RETQ %eax + + bb.2.null: + liveins: %rdi + + CALL64pcrel32 @stub, csr_64, implicit %rsp, implicit %rdi, implicit-def %rsp + + bb.3.right: + dead %edi = XOR32rr undef %edi, undef %edi, implicit-def dead %eflags, implicit-def %rdi + CALL64pcrel32 @stub, csr_64, implicit %rsp, implicit %rdi, implicit-def %rsp + +...