forked from OSchip/llvm-project
[WinEH] Optimize WinEH state stores
32-bit x86 Windows targets use a linked-list of nodes allocated on the stack, referenced to via thread-local storage. The personality routine interprets one of the fields in the node as a 'state number' which indicates where the personality routine should transfer control. State transitions are possible only before call-sites which may throw exceptions. Our previous scheme had us update the state number before all call-sites which may throw. Instead, we can try to minimize the number of times we need to store by reasoning about the nearest store which dominates the current call-site. If the last store agrees with the current call-site, then we know that the state-update is redundant and can be elided. This is largely straightforward: an RPO walk of the blocks allows us to correctly forward propagate the information when the function is a DAG. Currently, loops are not handled optimally and may trigger superfluous state stores. Differential Revision: http://reviews.llvm.org/D16763 llvm-svn: 261122
This commit is contained in:
parent
cef252ea4c
commit
7e5937b775
|
@ -15,14 +15,20 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/EHPersonalities.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/WinEHFuncInfo.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include <deque>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -33,6 +39,8 @@ void initializeWinEHStatePassPass(PassRegistry &);
|
|||
}
|
||||
|
||||
namespace {
|
||||
const int OverdefinedState = INT_MIN;
|
||||
|
||||
class WinEHStatePass : public FunctionPass {
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid.
|
||||
|
@ -82,6 +90,8 @@ private:
|
|||
// Per-function state
|
||||
EHPersonality Personality = EHPersonality::Unknown;
|
||||
Function *PersonalityFn = nullptr;
|
||||
bool UseStackGuard = false;
|
||||
int ParentBaseState;
|
||||
|
||||
/// The stack allocation containing all EH data, including the link in the
|
||||
/// fs:00 chain and the current state.
|
||||
|
@ -170,6 +180,7 @@ bool WinEHStatePass::runOnFunction(Function &F) {
|
|||
// Reset per-function state.
|
||||
PersonalityFn = nullptr;
|
||||
Personality = EHPersonality::Unknown;
|
||||
UseStackGuard = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -247,7 +258,6 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
|
|||
// Struct type of RegNode. Used for GEPing.
|
||||
Type *RegNodeTy;
|
||||
|
||||
StringRef PersonalityName = PersonalityFn->getName();
|
||||
IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin());
|
||||
Type *Int8PtrType = Builder.getInt8PtrTy();
|
||||
if (Personality == EHPersonality::MSVC_CXX) {
|
||||
|
@ -259,7 +269,8 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
|
|||
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
|
||||
// TryLevel = -1
|
||||
StateFieldIndex = 2;
|
||||
insertStateNumberStore(&*Builder.GetInsertPoint(), -1);
|
||||
ParentBaseState = -1;
|
||||
insertStateNumberStore(&*Builder.GetInsertPoint(), ParentBaseState);
|
||||
// Handler = __ehhandler$F
|
||||
Function *Trampoline = generateLSDAInEAXThunk(F);
|
||||
Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
|
||||
|
@ -267,7 +278,6 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
|
|||
} else if (Personality == EHPersonality::MSVC_X86SEH) {
|
||||
// If _except_handler4 is in use, some additional guard checks and prologue
|
||||
// stuff is required.
|
||||
bool UseStackGuard = (PersonalityName == "_except_handler4");
|
||||
RegNodeTy = getSEHRegistrationType();
|
||||
RegNode = Builder.CreateAlloca(RegNodeTy);
|
||||
// SavedESP = llvm.stacksave()
|
||||
|
@ -276,7 +286,10 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
|
|||
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
|
||||
// TryLevel = -2 / -1
|
||||
StateFieldIndex = 4;
|
||||
insertStateNumberStore(&*Builder.GetInsertPoint(), UseStackGuard ? -2 : -1);
|
||||
StringRef PersonalityName = PersonalityFn->getName();
|
||||
UseStackGuard = (PersonalityName == "_except_handler4");
|
||||
ParentBaseState = UseStackGuard ? -2 : -1;
|
||||
insertStateNumberStore(&*Builder.GetInsertPoint(), ParentBaseState);
|
||||
// ScopeTable = llvm.x86.seh.lsda(F)
|
||||
Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
|
||||
Value *LSDA = Builder.CreateCall(
|
||||
|
@ -388,6 +401,88 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
|
|||
Builder.CreateStore(Next, FSZero);
|
||||
}
|
||||
|
||||
// Figure out what state we should assign calls in this block.
|
||||
static int getBaseStateForBB(DenseMap<BasicBlock *, ColorVector> &BlockColors,
|
||||
WinEHFuncInfo &FuncInfo, BasicBlock *BB) {
|
||||
int BaseState = -1;
|
||||
auto &BBColors = BlockColors[BB];
|
||||
|
||||
assert(BBColors.size() == 1 && "multi-color BB not removed by preparation");
|
||||
BasicBlock *FuncletEntryBB = BBColors.front();
|
||||
if (auto *FuncletPad =
|
||||
dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI())) {
|
||||
auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
|
||||
if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
|
||||
BaseState = BaseStateI->second;
|
||||
}
|
||||
|
||||
return BaseState;
|
||||
}
|
||||
|
||||
// Calculate the state a call-site is in.
|
||||
static int getStateForCallSite(DenseMap<BasicBlock *, ColorVector> &BlockColors,
|
||||
WinEHFuncInfo &FuncInfo, CallSite CS) {
|
||||
if (auto *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
|
||||
// Look up the state number of the EH pad this unwinds to.
|
||||
assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
|
||||
return FuncInfo.InvokeStateMap[II];
|
||||
}
|
||||
// Possibly throwing call instructions have no actions to take after
|
||||
// an unwind. Ensure they are in the -1 state.
|
||||
return getBaseStateForBB(BlockColors, FuncInfo, CS.getParent());
|
||||
}
|
||||
|
||||
// Calculate the intersection of all the FinalStates for a BasicBlock's
|
||||
// predecessor.
|
||||
static int getPredState(DenseMap<BasicBlock *, int> &FinalStates, Function &F,
|
||||
int ParentBaseState, BasicBlock *BB) {
|
||||
// The entry block has no predecessors but we know that the prologue always
|
||||
// sets us up with a fixed state.
|
||||
if (&F.getEntryBlock() == BB)
|
||||
return ParentBaseState;
|
||||
|
||||
// This is an EH Pad, conservatively report this basic block as overdefined.
|
||||
if (BB->isEHPad())
|
||||
return OverdefinedState;
|
||||
|
||||
int CommonState = OverdefinedState;
|
||||
for (BasicBlock *PredBB : predecessors(BB)) {
|
||||
// We didn't manage to get a state for one of these predecessors,
|
||||
// conservatively report this basic block as overdefined.
|
||||
auto PredEndState = FinalStates.find(PredBB);
|
||||
if (PredEndState == FinalStates.end())
|
||||
return OverdefinedState;
|
||||
|
||||
// This code is reachable via exceptional control flow,
|
||||
// conservatively report this basic block as overdefined.
|
||||
if (isa<CatchReturnInst>(PredBB->getTerminator()))
|
||||
return OverdefinedState;
|
||||
|
||||
int PredState = PredEndState->second;
|
||||
assert(PredState != OverdefinedState &&
|
||||
"overdefined BBs shouldn't be in FinalStates");
|
||||
if (CommonState == OverdefinedState)
|
||||
CommonState = PredState;
|
||||
|
||||
// At least two predecessors have different FinalStates,
|
||||
// conservatively report this basic block as overdefined.
|
||||
if (CommonState != PredState)
|
||||
return OverdefinedState;
|
||||
}
|
||||
|
||||
return CommonState;
|
||||
};
|
||||
|
||||
static bool isStateStoreNeeded(EHPersonality Personality, CallSite CS) {
|
||||
if (!CS)
|
||||
return false;
|
||||
|
||||
if (isAsynchronousEHPersonality(Personality))
|
||||
return !CS.doesNotAccessMemory();
|
||||
|
||||
return !CS.doesNotThrow();
|
||||
}
|
||||
|
||||
void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
|
||||
// Mark the registration node. The backend needs to know which alloca it is so
|
||||
// that it can recover the original frame pointer.
|
||||
|
@ -405,38 +500,86 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
|
|||
|
||||
// Iterate all the instructions and emit state number stores.
|
||||
DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(F);
|
||||
for (BasicBlock &BB : F) {
|
||||
// Figure out what state we should assign calls in this block.
|
||||
int BaseState = -1;
|
||||
auto &BBColors = BlockColors[&BB];
|
||||
ReversePostOrderTraversal<Function *> RPOT(&F);
|
||||
|
||||
assert(BBColors.size() == 1 &&
|
||||
"multi-color BB not removed by preparation");
|
||||
BasicBlock *FuncletEntryBB = BBColors.front();
|
||||
if (auto *FuncletPad =
|
||||
dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI())) {
|
||||
// We do not support nesting funclets within cleanuppads.
|
||||
if (isa<CleanupPadInst>(FuncletPad))
|
||||
// InitialStates yields the state of the first call-site for a BasicBlock.
|
||||
DenseMap<BasicBlock *, int> InitialStates;
|
||||
// FinalStates yields the state of the last call-site for a BasicBlock.
|
||||
DenseMap<BasicBlock *, int> FinalStates;
|
||||
// Worklist used to revisit BasicBlocks with indeterminate
|
||||
// Initial/Final-States.
|
||||
std::deque<BasicBlock *> Worklist;
|
||||
// Fill in InitialStates and FinalStates for BasicBlocks with call-sites.
|
||||
for (BasicBlock *BB : RPOT) {
|
||||
int InitialState = OverdefinedState;
|
||||
int FinalState;
|
||||
if (&F.getEntryBlock() == BB)
|
||||
InitialState = FinalState = ParentBaseState;
|
||||
for (Instruction &I : *BB) {
|
||||
CallSite CS(&I);
|
||||
if (!isStateStoreNeeded(Personality, CS))
|
||||
continue;
|
||||
|
||||
auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
|
||||
if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
|
||||
BaseState = BaseStateI->second;
|
||||
int State = getStateForCallSite(BlockColors, FuncInfo, CS);
|
||||
if (InitialState == OverdefinedState)
|
||||
InitialState = State;
|
||||
FinalState = State;
|
||||
}
|
||||
// No call-sites in this basic block? That's OK, we will come back to these
|
||||
// in a later pass.
|
||||
if (InitialState == OverdefinedState) {
|
||||
Worklist.push_back(BB);
|
||||
continue;
|
||||
}
|
||||
DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
|
||||
<< " InitialState=" << InitialState << '\n');
|
||||
DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
|
||||
<< " FinalState=" << FinalState << '\n');
|
||||
InitialStates.insert({BB, InitialState});
|
||||
FinalStates.insert({BB, FinalState});
|
||||
}
|
||||
|
||||
for (Instruction &I : BB) {
|
||||
if (auto *CI = dyn_cast<CallInst>(&I)) {
|
||||
// Possibly throwing call instructions have no actions to take after
|
||||
// an unwind. Ensure they are in the -1 state.
|
||||
if (CI->doesNotThrow())
|
||||
continue;
|
||||
insertStateNumberStore(CI, BaseState);
|
||||
} else if (auto *II = dyn_cast<InvokeInst>(&I)) {
|
||||
// Look up the state number of the landingpad this unwinds to.
|
||||
assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
|
||||
int State = FuncInfo.InvokeStateMap[II];
|
||||
insertStateNumberStore(II, State);
|
||||
}
|
||||
// Try to fill-in InitialStates and FinalStates which have no call-sites.
|
||||
while (!Worklist.empty()) {
|
||||
BasicBlock *BB = Worklist.front();
|
||||
Worklist.pop_front();
|
||||
// This BasicBlock has already been figured out, nothing more we can do.
|
||||
if (InitialStates.count(BB) != 0)
|
||||
continue;
|
||||
|
||||
int PredState = getPredState(FinalStates, F, ParentBaseState, BB);
|
||||
if (PredState == OverdefinedState)
|
||||
continue;
|
||||
|
||||
// We successfully inferred this BasicBlock's state via it's predecessors;
|
||||
// enqueue it's successors to see if we can infer their states.
|
||||
InitialStates.insert({BB, PredState});
|
||||
FinalStates.insert({BB, PredState});
|
||||
for (BasicBlock *SuccBB : successors(BB))
|
||||
Worklist.push_back(SuccBB);
|
||||
}
|
||||
|
||||
// Finally, insert state stores before call-sites which transition us to a new
|
||||
// state.
|
||||
for (BasicBlock *BB : RPOT) {
|
||||
auto &BBColors = BlockColors[BB];
|
||||
BasicBlock *FuncletEntryBB = BBColors.front();
|
||||
if (isa<CleanupPadInst>(FuncletEntryBB->getFirstNonPHI()))
|
||||
continue;
|
||||
|
||||
int PrevState = getPredState(FinalStates, F, ParentBaseState, BB);
|
||||
DEBUG(dbgs() << "X86WinEHState: " << BB->getName()
|
||||
<< " PrevState=" << PrevState << '\n');
|
||||
|
||||
for (Instruction &I : *BB) {
|
||||
CallSite CS(&I);
|
||||
if (!isStateStoreNeeded(Personality, CS))
|
||||
continue;
|
||||
|
||||
int State = getStateForCallSite(BlockColors, FuncInfo, CS);
|
||||
if (State != PrevState)
|
||||
insertStateNumberStore(&I, State);
|
||||
PrevState = State;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,11 @@ entry:
|
|||
; CHECK: entry:
|
||||
; CHECK: store i32 -1
|
||||
; CHECK: call void @g(i32 3)
|
||||
; CHECK-NEXT: call void @g(i32 4)
|
||||
; CHECK-NEXT: call void @g(i32 5)
|
||||
call void @g(i32 3)
|
||||
call void @g(i32 4)
|
||||
call void @g(i32 5)
|
||||
store i32 0, i32* %tmp, align 4
|
||||
%0 = bitcast i32* %tmp to i8*
|
||||
; CHECK: store i32 0
|
||||
|
@ -54,14 +58,22 @@ catch.3: ; preds = %catch.dispatch.1
|
|||
; CHECK: catch.3:
|
||||
; CHECK: store i32 3
|
||||
; CHECK: call void @g(i32 1)
|
||||
; CHECK-NEXT: call void @g(i32 2)
|
||||
; CHECK-NEXT: call void @g(i32 3)
|
||||
call void @g(i32 1)
|
||||
call void @g(i32 2)
|
||||
call void @g(i32 3)
|
||||
catchret from %2 to label %try.cont
|
||||
|
||||
try.cont: ; preds = %catch.3
|
||||
; CHECK: try.cont:
|
||||
; CHECK: store i32 1
|
||||
; CHECK: call void @g(i32 2)
|
||||
; CHECK-NEXT: call void @g(i32 3)
|
||||
; CHECK-NEXT: call void @g(i32 4)
|
||||
call void @g(i32 2)
|
||||
call void @g(i32 3)
|
||||
call void @g(i32 4)
|
||||
unreachable
|
||||
|
||||
unreachable: ; preds = %catch
|
||||
|
@ -111,6 +123,10 @@ try.cont: ; preds = %catch2
|
|||
; CHECK: try.cont:
|
||||
; CHECK: store i32 1
|
||||
; CHECK: call void @dtor()
|
||||
; CHECK-NEXT: call void @dtor()
|
||||
; CHECK-NEXT: call void @dtor()
|
||||
call void @dtor() #3 [ "funclet"(token %1) ]
|
||||
call void @dtor() #3 [ "funclet"(token %1) ]
|
||||
call void @dtor() #3 [ "funclet"(token %1) ]
|
||||
catchret from %1 to label %try.cont4
|
||||
|
||||
|
@ -131,6 +147,52 @@ unreachable1: ; preds = %catch
|
|||
unreachable
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @required_state_store(
|
||||
define void @required_state_store(i1 zeroext %cond) personality i32 (...)* @_except_handler3 {
|
||||
entry:
|
||||
%__exception_code = alloca i32, align 4
|
||||
call void (...) @llvm.localescape(i32* nonnull %__exception_code)
|
||||
; CHECK: store i32 -1
|
||||
; CHECK: call void @g(i32 0)
|
||||
call void @g(i32 0)
|
||||
br i1 %cond, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
; CHECK: store i32 0
|
||||
; CHECK-NEXT: invoke void @g(i32 1)
|
||||
invoke void @g(i32 1)
|
||||
to label %if.end unwind label %catch.dispatch
|
||||
|
||||
catch.dispatch: ; preds = %if.then
|
||||
%0 = catchswitch within none [label %__except.ret] unwind to caller
|
||||
|
||||
__except.ret: ; preds = %catch.dispatch
|
||||
%1 = catchpad within %0 [i8* bitcast (i32 ()* @"\01?filt$0@0@required_state_store@@" to i8*)]
|
||||
catchret from %1 to label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %__except.ret, %entry
|
||||
; CHECK: store i32 -1
|
||||
; CHECK-NEXT: call void @dtor()
|
||||
call void @dtor()
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal i32 @"\01?filt$0@0@required_state_store@@"() {
|
||||
entry:
|
||||
%0 = tail call i8* @llvm.frameaddress(i32 1)
|
||||
%1 = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void (i1)* @required_state_store to i8*), i8* %0)
|
||||
%2 = tail call i8* @llvm.localrecover(i8* bitcast (void (i1)* @required_state_store to i8*), i8* %1, i32 0)
|
||||
%__exception_code = bitcast i8* %2 to i32*
|
||||
%3 = getelementptr inbounds i8, i8* %0, i32 -20
|
||||
%4 = bitcast i8* %3 to { i32*, i8* }**
|
||||
%5 = load { i32*, i8* }*, { i32*, i8* }** %4, align 4
|
||||
%6 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %5, i32 0, i32 0
|
||||
%7 = load i32*, i32** %6, align 4
|
||||
%8 = load i32, i32* %7, align 4
|
||||
store i32 %8, i32* %__exception_code, align 4
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
declare void @g(i32) #0
|
||||
|
||||
declare void @dtor()
|
||||
|
@ -139,6 +201,16 @@ declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
|
|||
|
||||
declare i32 @__CxxFrameHandler3(...)
|
||||
|
||||
declare i8* @llvm.frameaddress(i32)
|
||||
|
||||
declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
|
||||
|
||||
declare i8* @llvm.localrecover(i8*, i8*, i32)
|
||||
|
||||
declare void @llvm.localescape(...)
|
||||
|
||||
declare i32 @_except_handler3(...)
|
||||
|
||||
attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { noreturn }
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ ehcleanup: ; preds = %entry
|
|||
; CHECK: "?dtor$2@?0?passes_two@4HA":
|
||||
; CHECK: pushl %ebp
|
||||
; CHECK: subl $8, %esp
|
||||
; CHECK: addl $12, %ebp
|
||||
; CHECK: addl $16, %ebp
|
||||
; CHECK: {{movl|leal}} -{{[0-9]+}}(%ebp), %ecx
|
||||
; CHECK: calll "??1A@@QAE@XZ"
|
||||
; CHECK: addl $8, %esp
|
||||
|
|
Loading…
Reference in New Issue