From 51387a8c28ca43ceadd29aa58fde8fe1103f9a2a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 13 Dec 2016 01:21:15 +0000 Subject: [PATCH] [Statepoints] Reuse stack slots more than once within a basic block The stack slot reuse code had a really amusing bug. We ended up only reusing a stack slot exact once (initial use + reuse) within a basic block. If we had a third statepoint to process, we ended up allocating a new set of stack slots. If we crossed a basic block boundary, the set got cleared. As a result, code which is invoke heavy doesn't see the problem, but multiple calls within a basic block does. Net result: as we optimize invokes into calls, lowering gets worse. The root error here is that the bitmap uses by the custom allocator wasn't kept in sync. The result was that we ended up resizing the bitmap on the next statepoint (to handle the cross block case), reset the bit once, but then never reset it again. Differential Revision: https://reviews.llvm.org/D25243 llvm-svn: 289509 --- .../SelectionDAG/StatepointLowering.cpp | 13 +++++--- .../CodeGen/X86/statepoint-stack-usage.ll | 30 +++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index d438e6e7f35f..d27e2455978d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -55,7 +55,8 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { NextSlotToAllocate = 0; // Need to resize this on each safepoint - we need the two to stay in sync and // the clear patterns of a SelectionDAGBuilder have no relation to - // FunctionLoweringInfo. SmallBitVector::reset initializes all bits to false. + // FunctionLoweringInfo. Also need to ensure used bits get cleared. + AllocatedStackSlots.clear(); AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); } @@ -82,9 +83,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, const size_t NumSlots = AllocatedStackSlots.size(); assert(NextSlotToAllocate <= NumSlots && "Broken invariant"); - // The stack slots in StatepointStackSlots beyond the first NumSlots were - // added in this instance of StatepointLoweringState, and cannot be re-used. - assert(NumSlots <= Builder.FuncInfo.StatepointStackSlots.size() && + assert(AllocatedStackSlots.size() == + Builder.FuncInfo.StatepointStackSlots.size() && "Broken invariant"); for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) { @@ -92,6 +92,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; if (MFI.getObjectSize(FI) == SpillSize) { AllocatedStackSlots.set(NextSlotToAllocate); + // TODO: Is ValueType the right thing to use here? return Builder.DAG.getFrameIndex(FI, ValueType); } } @@ -104,6 +105,10 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, MFI.markAsStatepointSpillSlotObjectIndex(FI); Builder.FuncInfo.StatepointStackSlots.push_back(FI); + AllocatedStackSlots.resize(AllocatedStackSlots.size()+1, true); + assert(AllocatedStackSlots.size() == + Builder.FuncInfo.StatepointStackSlots.size() && + "Broken invariant"); StatepointMaxSlotsRequired = std::max( StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size()); diff --git a/llvm/test/CodeGen/X86/statepoint-stack-usage.ll b/llvm/test/CodeGen/X86/statepoint-stack-usage.ll index d4bc7d47f669..5c27898f284a 100644 --- a/llvm/test/CodeGen/X86/statepoint-stack-usage.ll +++ b/llvm/test/CodeGen/X86/statepoint-stack-usage.ll @@ -54,6 +54,36 @@ define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrsp ret i32 1 } +; Check that we reuse the same stack slot across multiple calls. The use of +; more than two calls here is critical. We've had a bug which allowed reuse +; exactly once which went undetected for a long time. +define i32 @back_to_back_deopt(i32 %a, i32 %b, i32 %c) #1 + gc "statepoint-example" { +; CHECK-LABEL: back_to_back_deopt +; The exact stores don't matter, but there need to be three stack slots created +; CHECK: movl %ebx, 12(%rsp) +; CHECK: movl %ebp, 8(%rsp) +; CHECK: movl %r14d, 4(%rsp) +; CHECK: callq +; CHECK: movl %ebx, 12(%rsp) +; CHECK: movl %ebp, 8(%rsp) +; CHECK: movl %r14d, 4(%rsp) +; CHECK: callq +; CHECK: movl %ebx, 12(%rsp) +; CHECK: movl %ebp, 8(%rsp) +; CHECK: movl %r14d, 4(%rsp) +; CHECK: callq +; CHECK: movl %ebx, 12(%rsp) +; CHECK: movl %ebp, 8(%rsp) +; CHECK: movl %r14d, 4(%rsp) +; CHECK: callq + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) +call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) +call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) +call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) + ret i32 1 +} + ; Test that stack slots are reused for invokes define i32 @back_to_back_invokes(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" personality i32 ()* @"personality_function" { ; CHECK-LABEL: back_to_back_invokes