From 2f88a30ca660ca60b460a08d33eef3ab9517ee28 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Tue, 24 Aug 2021 20:40:21 -0700 Subject: [PATCH] [WebAssembly] Extract longjmp handling in EmSjLj to a function (NFC) Emscripten SjLj and (soon-to-be-added) Wasm SjLj transformation share many steps: 1. Initialize `setjmpTable` and `setjmpTableSize` in the entry BB 2. Handle `setjmp` callsites 3. Handle `longjmp` callsites 4. Cleanup and update SSA 1, 3, and 4 are identical for Emscripten SjLj and Wasm SjLj. Only the step 2 is different. This CL extracts the current Emscripten SjLj's longjmp callsites handling into a function. The reason to make this a separate CL is, without this, the diff tool cannot compare things well in the presence of moved code and added code in the followup Wasm SjLj CL, and it ends up mixing them together, making the diff unreadable. Also fixes some typos and variable names. So far we've been calling the buffer argument to `setjmp` and `longjmp` `jmpbuf`, but the name used in the man page for those functions is `env`, so updated them to be consistent. Reviewed By: tlively Differential Revision: https://reviews.llvm.org/D108728 --- llvm/lib/CodeGen/WasmEHPrepare.cpp | 4 +- .../WebAssemblyLowerEmscriptenEHSjLj.cpp | 192 ++++++++++-------- 2 files changed, 112 insertions(+), 84 deletions(-) diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp index c4c84cd921fa..c04a7b28eff9 100644 --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -29,7 +29,7 @@ // __wasm_lpad_context.lpad_index = index; // __wasm_lpad_context.lsda = wasm.lsda(); // _Unwind_CallPersonality(exn); -// selector = __wasm.landingpad_context.selector; +// selector = __wasm_lpad_context.selector; // ... // // @@ -329,7 +329,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality, OperandBundleDef("funclet", CPI)); PersCI->setDoesNotThrow(); - // Pseudocode: int selector = __wasm.landingpad_context.selector; + // Pseudocode: int selector = __wasm_lpad_context.selector; Instruction *Selector = IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 3488ef42cfb9..263c5140f161 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -126,9 +126,9 @@ /// In case calls to longjmp() exists /// /// 1) Lower -/// longjmp(buf, value) +/// longjmp(env, val) /// into -/// emscripten_longjmp(buf, value) +/// emscripten_longjmp(env, val) /// /// In case calls to setjmp() exists /// @@ -141,9 +141,9 @@ /// Emscripten compiler-rt. /// /// 3) Lower -/// setjmp(buf) +/// setjmp(env) /// into -/// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize); +/// setjmpTable = saveSetjmp(env, label, setjmpTable, setjmpTableSize); /// setjmpTableSize = getTempRet0(); /// For each dynamic setjmp call, setjmpTable stores its ID (a number which /// is incrementally assigned from 0) and its label (a unique number that @@ -151,7 +151,7 @@ /// setjmpTable, it is reallocated in saveSetjmp() in Emscripten's /// compiler-rt and it will return the new table address, and assign the new /// table size in setTempRet0(). saveSetjmp also stores the setjmp's ID into -/// the buffer buf. A BB with setjmp is split into two after setjmp call in +/// the buffer 'env'. A BB with setjmp is split into two after setjmp call in /// order to make the post-setjmp BB the possible destination of longjmp BB. /// /// @@ -251,8 +251,13 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { return "WebAssembly Lower Emscripten Exceptions"; } + using InstVector = SmallVectorImpl; bool runEHOnFunction(Function &F); bool runSjLjOnFunction(Function &F); + void handleLongjmpableCallsForEmscriptenSjLj( + Function &F, InstVector &SetjmpTableInsts, + InstVector &SetjmpTableSizeInsts, + SmallVectorImpl &SetjmpRetPHIs); Function *getFindMatchingCatch(Module &M, unsigned NumClauses); Value *wrapInvoke(CallBase *CI); @@ -678,9 +683,9 @@ static void replaceLongjmpWithEmscriptenLongjmp(Function *LongjmpF, auto *CI = dyn_cast(U); if (CI && CI->getCalledFunction() == LongjmpF) { IRB.SetInsertPoint(CI); - Value *JmpBuf = - IRB.CreatePtrToInt(CI->getArgOperand(0), getAddrIntType(M), "jmpbuf"); - IRB.CreateCall(EmLongjmpF, {JmpBuf, CI->getArgOperand(1)}); + Value *Env = + IRB.CreatePtrToInt(CI->getArgOperand(0), getAddrIntType(M), "env"); + IRB.CreateCall(EmLongjmpF, {Env, CI->getArgOperand(1)}); ToErase.push_back(CI); } } @@ -1098,8 +1103,103 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { ToErase.push_back(CI); } - // Update each call that can longjmp so it can return to a setjmp where - // relevant. + // Handle longjmp calls. + handleLongjmpableCallsForEmscriptenSjLj(F, SetjmpTableInsts, + SetjmpTableSizeInsts, SetjmpRetPHIs); + + // Erase everything we no longer need in this function + for (Instruction *I : ToErase) + I->eraseFromParent(); + + // Free setjmpTable buffer before each return instruction + function-exiting + // call + SmallVector ExitingInsts; + for (BasicBlock &BB : F) { + Instruction *TI = BB.getTerminator(); + if (isa(TI)) + ExitingInsts.push_back(TI); + for (auto &I : BB) { + if (auto *CB = dyn_cast(&I)) { + StringRef CalleeName = CB->getCalledOperand()->getName(); + if (CalleeName == "__resumeException" || + CalleeName == "emscripten_longjmp" || CalleeName == "__cxa_throw") + ExitingInsts.push_back(&I); + } + } + } + for (auto *I : ExitingInsts) { + DebugLoc DL = getOrCreateDebugLoc(I, F.getSubprogram()); + auto *Free = CallInst::CreateFree(SetjmpTable, I); + Free->setDebugLoc(DL); + // CallInst::CreateFree may create a bitcast instruction if its argument + // types mismatch. We need to set the debug loc for the bitcast too. + if (auto *FreeCallI = dyn_cast(Free)) { + if (auto *BitCastI = dyn_cast(FreeCallI->getArgOperand(0))) + BitCastI->setDebugLoc(DL); + } + } + + // Every call to saveSetjmp can change setjmpTable and setjmpTableSize + // (when buffer reallocation occurs) + // entry: + // setjmpTableSize = 4; + // setjmpTable = (int *) malloc(40); + // setjmpTable[0] = 0; + // ... + // somebb: + // setjmpTable = saveSetjmp(env, label, setjmpTable, setjmpTableSize); + // setjmpTableSize = getTempRet0(); + // So we need to make sure the SSA for these variables is valid so that every + // saveSetjmp and testSetjmp calls have the correct arguments. + SSAUpdater SetjmpTableSSA; + SSAUpdater SetjmpTableSizeSSA; + SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable"); + SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize"); + for (Instruction *I : SetjmpTableInsts) + SetjmpTableSSA.AddAvailableValue(I->getParent(), I); + for (Instruction *I : SetjmpTableSizeInsts) + SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I); + + for (auto &U : make_early_inc_range(SetjmpTable->uses())) + if (auto *I = dyn_cast(U.getUser())) + if (I->getParent() != Entry) + SetjmpTableSSA.RewriteUse(U); + for (auto &U : make_early_inc_range(SetjmpTableSize->uses())) + if (auto *I = dyn_cast(U.getUser())) + if (I->getParent() != Entry) + SetjmpTableSizeSSA.RewriteUse(U); + + // Finally, our modifications to the cfg can break dominance of SSA variables. + // For example, in this code, + // if (x()) { .. setjmp() .. } + // if (y()) { .. longjmp() .. } + // We must split the longjmp block, and it can jump into the block splitted + // from setjmp one. But that means that when we split the setjmp block, it's + // first part no longer dominates its second part - there is a theoretically + // possible control flow path where x() is false, then y() is true and we + // reach the second part of the setjmp block, without ever reaching the first + // part. So, we rebuild SSA form here. + rebuildSSA(F); + return true; +} + +// Update each call that can longjmp so it can return to a setjmp where +// relevant. +void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj( + Function &F, InstVector &SetjmpTableInsts, InstVector &SetjmpTableSizeInsts, + SmallVectorImpl &SetjmpRetPHIs) { + Module &M = *F.getParent(); + LLVMContext &C = F.getContext(); + IRBuilder<> IRB(C); + SmallVector ToErase; + + // We need to pass setjmpTable and setjmpTableSize to testSetjmp function. + // These values are defined in the beginning of the function and also in each + // setjmp callsite, but we don't know which values we should use at this + // point. So here we arbitraily use the ones defined in the beginning of the + // function, and SSAUpdater will later update them to the correct values. + Instruction *SetjmpTable = *SetjmpTableInsts.begin(); + Instruction *SetjmpTableSize = *SetjmpTableSizeInsts.begin(); // Because we are creating new BBs while processing and don't want to make // all these newly created BBs candidates again for longjmp processing, we @@ -1247,78 +1347,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { } } - // Erase everything we no longer need in this function for (Instruction *I : ToErase) I->eraseFromParent(); - - // Free setjmpTable buffer before each return instruction + function-exiting - // call - SmallVector ExitingInsts; - for (BasicBlock &BB : F) { - Instruction *TI = BB.getTerminator(); - if (isa(TI)) - ExitingInsts.push_back(TI); - for (auto &I : BB) { - if (auto *CB = dyn_cast(&I)) { - StringRef CalleeName = CB->getCalledOperand()->getName(); - if (CalleeName == "__resumeException" || - CalleeName == "emscripten_longjmp" || CalleeName == "__cxa_throw") - ExitingInsts.push_back(&I); - } - } - } - for (auto *I : ExitingInsts) { - DebugLoc DL = getOrCreateDebugLoc(I, F.getSubprogram()); - auto *Free = CallInst::CreateFree(SetjmpTable, I); - Free->setDebugLoc(DL); - // CallInst::CreateFree may create a bitcast instruction if its argument - // types mismatch. We need to set the debug loc for the bitcast too. - if (auto *FreeCallI = dyn_cast(Free)) { - if (auto *BitCastI = dyn_cast(FreeCallI->getArgOperand(0))) - BitCastI->setDebugLoc(DL); - } - } - - // Every call to saveSetjmp can change setjmpTable and setjmpTableSize - // (when buffer reallocation occurs) - // entry: - // setjmpTableSize = 4; - // setjmpTable = (int *) malloc(40); - // setjmpTable[0] = 0; - // ... - // somebb: - // setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize); - // setjmpTableSize = getTempRet0(); - // So we need to make sure the SSA for these variables is valid so that every - // saveSetjmp and testSetjmp calls have the correct arguments. - SSAUpdater SetjmpTableSSA; - SSAUpdater SetjmpTableSizeSSA; - SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable"); - SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize"); - for (Instruction *I : SetjmpTableInsts) - SetjmpTableSSA.AddAvailableValue(I->getParent(), I); - for (Instruction *I : SetjmpTableSizeInsts) - SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I); - - for (auto &U : make_early_inc_range(SetjmpTable->uses())) - if (auto *I = dyn_cast(U.getUser())) - if (I->getParent() != Entry) - SetjmpTableSSA.RewriteUse(U); - for (auto &U : make_early_inc_range(SetjmpTableSize->uses())) - if (auto *I = dyn_cast(U.getUser())) - if (I->getParent() != Entry) - SetjmpTableSizeSSA.RewriteUse(U); - - // Finally, our modifications to the cfg can break dominance of SSA variables. - // For example, in this code, - // if (x()) { .. setjmp() .. } - // if (y()) { .. longjmp() .. } - // We must split the longjmp block, and it can jump into the block splitted - // from setjmp one. But that means that when we split the setjmp block, it's - // first part no longer dominates its second part - there is a theoretically - // possible control flow path where x() is false, then y() is true and we - // reach the second part of the setjmp block, without ever reaching the first - // part. So, we rebuild SSA form here. - rebuildSSA(F); - return true; }