diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index f2fb7274d009..dbdd3d7c6d34 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -39,7 +39,6 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" #include -#include #include #include #include @@ -642,10 +641,6 @@ public: /// special linux kernel sections std::unordered_map> LKMarkers; - /// List of external addresses in the code that are not a function start - /// and are referenced from BinaryFunction. - std::list> InterproceduralReferences; - /// PseudoProbe decoder MCPseudoProbeDecoder ProbeDecoder; @@ -889,23 +884,8 @@ public: bool registerFragment(BinaryFunction &TargetFunction, BinaryFunction &Function) const; - /// Add unterprocedural reference for \p Function to \p Address - void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) { - InterproceduralReferences.push_back({Function, Address}); - } - - /// Used to fix the target of linker-generated AArch64 adrp + add - /// sequence with no relocation info. - void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits, - MCInst &LoadHiBits, uint64_t Target); - - /// Return true if AARch64 veneer was successfully matched at a given - /// \p Address and register veneer binary function if \p MatchOnly - /// argument is false. - bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false); - - /// Resolve inter-procedural dependencies from - void processInterproceduralReferences(); + /// Resolve inter-procedural dependencies from \p Function. + void processInterproceduralReferences(BinaryFunction &Function); /// Skip functions with all parent and child fragments transitively. void skipMarkedFragments(); diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index f7759818252e..3a84b26fbc8e 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -253,6 +253,10 @@ private: std::unique_ptr BLI; + /// Set of external addresses in the code that are not a function start + /// and are referenced from this function. + std::set InterproceduralReferences; + /// All labels in the function that are referenced via relocations from /// data objects. Typically these are jump table destinations and computed /// goto labels. diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 7cf846728ace..e9e6e6da8e96 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1384,7 +1384,7 @@ public: llvm_unreachable("not implemented"); } - /// Return not 0 if the instruction CurInst, in combination with the recent + /// Return true if the instruction CurInst, in combination with the recent /// history of disassembled instructions supplied by [Begin, End), is a linker /// generated veneer/stub that needs patching. This happens in AArch64 when /// the code is large and the linker needs to generate stubs, but it does @@ -1394,14 +1394,11 @@ public: /// is put in TgtLowBits, and its pair in TgtHiBits. If the instruction in /// TgtHiBits does not have an immediate operand, but an expression, then /// this expression is put in TgtHiSym and Tgt only contains the lower bits. - /// Return value is a total number of instructions that were used to create - /// a veneer. - virtual uint64_t matchLinkerVeneer(InstructionIterator Begin, - InstructionIterator End, uint64_t Address, - const MCInst &CurInst, - MCInst *&TargetHiBits, - MCInst *&TargetLowBits, - uint64_t &Target) const { + virtual bool matchLinkerVeneer(InstructionIterator Begin, + InstructionIterator End, uint64_t Address, + const MCInst &CurInst, MCInst *&TargetHiBits, + MCInst *&TargetLowBits, + uint64_t &Target) const { llvm_unreachable("not implemented"); } diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 97ca1671ed34..fa594cf35e9d 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -416,7 +416,7 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); } } else { - addInterproceduralReference(&BF, Address); + BF.InterproceduralReferences.insert(Address); } } @@ -1121,105 +1121,8 @@ bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, return true; } -void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, - MCInst &LoadLowBits, - MCInst &LoadHiBits, - uint64_t Target) { - const MCSymbol *TargetSymbol; - uint64_t Addend = 0; - std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, - /*IsPCRel*/ true); - int64_t Val; - MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, - ELF::R_AARCH64_ADR_PREL_PG_HI21); - MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), - Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); -} - -bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { - BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); - if (TargetFunction) - return false; - - ErrorOr Section = getSectionForAddress(Address); - assert(Section && "cannot get section for referenced address"); - if (!Section->isText()) - return false; - - bool Ret = false; - StringRef SectionContents = Section->getContents(); - uint64_t Offset = Address - Section->getAddress(); - const uint64_t MaxSize = SectionContents.size() - Offset; - const uint8_t *Bytes = - reinterpret_cast(SectionContents.data()); - ArrayRef Data(Bytes + Offset, MaxSize); - - auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, - MCInst &Instruction, uint64_t Offset, - uint64_t AbsoluteInstrAddr, - uint64_t TotalSize) -> bool { - MCInst *TargetHiBits, *TargetLowBits; - uint64_t TargetAddress, Count; - Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), - AbsoluteInstrAddr, Instruction, TargetHiBits, - TargetLowBits, TargetAddress); - if (!Count) - return false; - - if (MatchOnly) - return true; - - // NOTE The target symbol was created during disassemble's - // handleExternalReference - const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); - BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), - *Section, Address, TotalSize); - addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, - TargetAddress); - MIB->addAnnotation(Instruction, "AArch64Veneer", true); - Veneer->addInstruction(Offset, std::move(Instruction)); - --Count; - for (auto It = std::prev(Instructions.end()); Count != 0; - It = std::prev(It), --Count) { - MIB->addAnnotation(It->second, "AArch64Veneer", true); - Veneer->addInstruction(It->first, std::move(It->second)); - } - - Veneer->getOrCreateLocalLabel(Address); - Veneer->setMaxSize(TotalSize); - Veneer->updateState(BinaryFunction::State::Disassembled); - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address - << "\n"); - return true; - }; - - uint64_t Size = 0, TotalSize = 0; - BinaryFunction::InstrMapType VeneerInstructions; - for (Offset = 0; Offset < MaxSize; Offset += Size) { - MCInst Instruction; - const uint64_t AbsoluteInstrAddr = Address + Offset; - if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), - AbsoluteInstrAddr, nulls())) - break; - - TotalSize += Size; - if (MIB->isBranch(Instruction)) { - Ret = matchVeneer(VeneerInstructions, Instruction, Offset, - AbsoluteInstrAddr, TotalSize); - break; - } - - VeneerInstructions.emplace(Offset, std::move(Instruction)); - } - - return Ret; -} - -void BinaryContext::processInterproceduralReferences() { - for (const std::pair &It : - InterproceduralReferences) { - BinaryFunction &Function = *It.first; - uint64_t Address = It.second; +void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { + for (uint64_t Address : Function.InterproceduralReferences) { if (!Address) continue; @@ -1229,7 +1132,7 @@ void BinaryContext::processInterproceduralReferences() { continue; if (TargetFunction) { - if (TargetFunction->isFragment() && + if (TargetFunction->IsFragment && !registerFragment(*TargetFunction, Function)) { errs() << "BOLT-WARNING: interprocedural reference between unrelated " "fragments: " @@ -1255,10 +1158,6 @@ void BinaryContext::processInterproceduralReferences() { if (SectionName == ".plt" || SectionName == ".plt.got") continue; - // Check if it is aarch64 veneer written at Address - if (isAArch64() && handleAArch64Veneer(Address)) - continue; - if (opts::processAllFunctions()) { errs() << "BOLT-ERROR: cannot process binaries with unmarked " << "object in code at address 0x" << Twine::utohexstr(Address) @@ -1279,7 +1178,7 @@ void BinaryContext::processInterproceduralReferences() { } } - InterproceduralReferences.clear(); + clearList(Function.InterproceduralReferences); } void BinaryContext::postProcessSymbolTable() { diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 9b4403f9f56c..d490c4efc090 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1061,12 +1061,27 @@ bool BinaryFunction::disassemble() { Instruction, Expr, *BC.Ctx, 0))); }; + // Used to fix the target of linker-generated AArch64 stubs with no relocation + // info + auto fixStubTarget = [&](MCInst &LoadLowBits, MCInst &LoadHiBits, + uint64_t Target) { + const MCSymbol *TargetSymbol; + uint64_t Addend = 0; + std::tie(TargetSymbol, Addend) = BC.handleAddressRef(Target, *this, true); + + int64_t Val; + MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), + Val, ELF::R_AARCH64_ADR_PREL_PG_HI21); + MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), + Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); + }; + auto handleExternalReference = [&](MCInst &Instruction, uint64_t Size, uint64_t Offset, uint64_t TargetAddress, bool &IsCall) -> MCSymbol * { const uint64_t AbsoluteInstrAddr = getAddress() + Offset; MCSymbol *TargetSymbol = nullptr; - BC.addInterproceduralReference(this, TargetAddress); + InterproceduralReferences.insert(TargetAddress); if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) { errs() << "BOLT-WARNING: relaxed tail call detected at 0x" << Twine::utohexstr(AbsoluteInstrAddr) << " in function " << *this @@ -1132,7 +1147,7 @@ bool BinaryFunction::disassemble() { HasFixedIndirectBranch = true; } else { MIB->convertJmpToTailCall(Instruction); - BC.addInterproceduralReference(this, IndirectTarget); + InterproceduralReferences.insert(IndirectTarget); } break; } @@ -1149,20 +1164,19 @@ bool BinaryFunction::disassemble() { auto handleAArch64IndirectCall = [&](MCInst &Instruction, uint64_t Offset) { const uint64_t AbsoluteInstrAddr = getAddress() + Offset; MCInst *TargetHiBits, *TargetLowBits; - uint64_t TargetAddress, Count; - Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), - AbsoluteInstrAddr, Instruction, TargetHiBits, - TargetLowBits, TargetAddress); - if (Count) { + uint64_t TargetAddress; + if (MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), + AbsoluteInstrAddr, Instruction, TargetHiBits, + TargetLowBits, TargetAddress)) { MIB->addAnnotation(Instruction, "AArch64Veneer", true); - --Count; - for (auto It = std::prev(Instructions.end()); Count != 0; - It = std::prev(It), --Count) { + + uint8_t Counter = 0; + for (auto It = std::prev(Instructions.end()); Counter != 2; + --It, ++Counter) { MIB->addAnnotation(It->second, "AArch64Veneer", true); } - BC.addAdrpAddRelocAArch64(*this, *TargetLowBits, *TargetHiBits, - TargetAddress); + fixStubTarget(*TargetLowBits, *TargetHiBits, TargetAddress); } }; @@ -1281,9 +1295,7 @@ bool BinaryFunction::disassemble() { TargetSymbol = getOrCreateLocalLabel(TargetAddress); } else { if (TargetAddress == getAddress() + getSize() && - TargetAddress < getAddress() + getMaxSize() && - !(BC.isAArch64() && - BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) { + TargetAddress < getAddress() + getMaxSize()) { // Result of __builtin_unreachable(). LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x" << Twine::utohexstr(AbsoluteInstrAddr) @@ -2830,6 +2842,7 @@ void BinaryFunction::clearDisasmState() { clearList(Instructions); clearList(IgnoredBranches); clearList(TakenBranches); + clearList(InterproceduralReferences); if (BC.HasRelocations) { for (std::pair &LI : Labels) @@ -4403,20 +4416,17 @@ void BinaryFunction::printLoopInfo(raw_ostream &OS) const { } bool BinaryFunction::isAArch64Veneer() const { - if (empty()) + if (BasicBlocks.size() != 1) return false; BinaryBasicBlock &BB = **BasicBlocks.begin(); + if (BB.size() != 3) + return false; + for (MCInst &Inst : BB) if (!BC.MIB->hasAnnotation(Inst, "AArch64Veneer")) return false; - for (auto I = BasicBlocks.begin() + 1, E = BasicBlocks.end(); I != E; ++I) { - for (MCInst &Inst : **I) - if (!BC.MIB->isNoop(Inst)) - return false; - } - return true; } diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 69adec4b5d24..304cf7c2f71e 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1595,9 +1595,6 @@ void InstructionLowering::runOnFunctions(BinaryContext &BC) { } void StripRepRet::runOnFunctions(BinaryContext &BC) { - if (!BC.isX86()) - return; - uint64_t NumPrefixesRemoved = 0; uint64_t NumBytesSaved = 0; for (auto &BFI : BC.getBinaryFunctions()) { diff --git a/bolt/lib/Passes/VeneerElimination.cpp b/bolt/lib/Passes/VeneerElimination.cpp index 08294b14244b..dcfd553f729d 100644 --- a/bolt/lib/Passes/VeneerElimination.cpp +++ b/bolt/lib/Passes/VeneerElimination.cpp @@ -60,8 +60,8 @@ void VeneerElimination::runOnFunctions(BinaryContext &BC) { } } - outs() << "BOLT-INFO: number of removed linker-inserted veneers :" - << VeneersCount << "\n"; + LLVM_DEBUG(dbgs() << "BOLT-INFO: number of removed linker-inserted veneers :" + << VeneersCount << "\n"); // Handle veneers to veneers in case they occur for (auto entry : VeneerDestinations) { diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 11e8113051ae..3230fad99df8 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -312,10 +312,6 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique(), opts::AsmDump.getNumOccurrences()); - if (BC.isAArch64()) - Manager.registerPass( - std::make_unique(PrintVeneerElimination)); - if (opts::Instrument) Manager.registerPass(std::make_unique(NeverPrint)); @@ -342,6 +338,10 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique(PrintICF), opts::ICF); + if (BC.isAArch64()) + Manager.registerPass( + std::make_unique(PrintVeneerElimination)); + Manager.registerPass( std::make_unique(NeverPrint, opts::SpecializeMemcpy1), !opts::SpecializeMemcpy1.empty()); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index ddc445834e8e..1047c2cbf864 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2891,9 +2891,10 @@ void RewriteInstance::disassembleFunctions() { if (opts::PrintAll || opts::PrintDisasm) Function.print(outs(), "after disassembly", true); + + BC->processInterproceduralReferences(Function); } - BC->processInterproceduralReferences(); BC->clearJumpTableOffsets(); BC->populateJumpTables(); BC->skipMarkedFragments(); diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index c736196a84ca..72b337217275 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -1032,17 +1032,17 @@ public: /// ADD x16, x16, imm /// BR x16 /// - uint64_t matchLinkerVeneer(InstructionIterator Begin, InstructionIterator End, - uint64_t Address, const MCInst &CurInst, - MCInst *&TargetHiBits, MCInst *&TargetLowBits, - uint64_t &Target) const override { + bool matchLinkerVeneer(InstructionIterator Begin, InstructionIterator End, + uint64_t Address, const MCInst &CurInst, + MCInst *&TargetHiBits, MCInst *&TargetLowBits, + uint64_t &Target) const override { if (CurInst.getOpcode() != AArch64::BR || !CurInst.getOperand(0).isReg() || CurInst.getOperand(0).getReg() != AArch64::X16) - return 0; + return false; auto I = End; if (I == Begin) - return 0; + return false; --I; Address -= 4; @@ -1051,7 +1051,7 @@ public: !I->getOperand(1).isReg() || I->getOperand(0).getReg() != AArch64::X16 || I->getOperand(1).getReg() != AArch64::X16 || !I->getOperand(2).isImm()) - return 0; + return false; TargetLowBits = &*I; uint64_t Addr = I->getOperand(2).getImm() & 0xFFF; @@ -1060,12 +1060,12 @@ public: if (I->getOpcode() != AArch64::ADRP || MCPlus::getNumPrimeOperands(*I) < 2 || !I->getOperand(0).isReg() || !I->getOperand(1).isImm() || I->getOperand(0).getReg() != AArch64::X16) - return 0; + return false; TargetHiBits = &*I; Addr |= (Address + ((int64_t)I->getOperand(1).getImm() << 12)) & 0xFFFFFFFFFFFFF000ULL; Target = Addr; - return 3; + return true; } bool replaceImmWithSymbolRef(MCInst &Inst, const MCSymbol *Symbol, @@ -1107,6 +1107,8 @@ public: bool isPrefix(const MCInst &Inst) const override { return false; } + bool deleteREPPrefix(MCInst &Inst) const override { return false; } + bool createReturn(MCInst &Inst) const override { Inst.setOpcode(AArch64::RET); Inst.clear(); diff --git a/bolt/test/AArch64/veneer-gold.s b/bolt/test/AArch64/veneer-gold.s deleted file mode 100644 index d0227a93b1ff..000000000000 --- a/bolt/test/AArch64/veneer-gold.s +++ /dev/null @@ -1,75 +0,0 @@ -// This test checks that the gold linker style veneer are properly handled -// by BOLT. -// Strip .rela.mytext section to simulate inserted by a linker veneers -// that does not contain relocations. - -# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ -# RUN: %s -o %t.o -# RUN: %clang %cflags -fPIC -pie %t.o -o %t.exe \ -# RUN: -nostartfiles -nodefaultlibs -Wl,-z,notext \ -# RUN: -fuse-ld=lld -Wl,--no-relax -Wl,-q -# RUN: llvm-objcopy --remove-section .rela.mytext %t.exe -# RUN: llvm-objdump -d -j .mytext %t.exe | \ -# RUN: FileCheck --check-prefix=CHECKVENEER %s -# RUN: llvm-bolt %t.exe -o %t.bolt --elim-link-veneers=true \ -# RUN: --lite=0 --use-old-text=0 -# RUN: llvm-objdump -d -j .text %t.bolt | FileCheck %s - -.text -.balign 4 -.global dummy -.type dummy, %function -dummy: - adrp x1, dummy - ret -.size dummy, .-dummy - -.section ".mytext", "ax" -.balign 4 -.global foo -.type foo, %function -foo: -# CHECK: : -# CHECK-NEXT : {{.*}} bl {{.*}} - bl .L2 - ret -.size foo, .-foo - -.global foo2 -.type foo2, %function -foo2: -# CHECK: : -# CHECK-NEXT : {{.*}} bl {{.*}} - bl .L2 - ret -.size foo2, .-foo2 - -.global _start -.type _start, %function -_start: -# CHECK: <_start>: -# CHECK-NEXT: {{.*}} bl {{.*}} - bl .L1 - adr x0, .L0 - ret -.L0: - .xword 0 -.size _start, .-_start -.L1: -# CHECKVENEER: adrp -# CHECKVENEER-NEXT: add -# CHECKVENEER-NEXT: br -# CHECKVENEER-NEXT: nop - adrp x16, foo - add x16, x16, #:lo12:foo - br x16 - nop -.L2: -# CHECKVENEER-NEXT: adrp -# CHECKVENEER-NEXT: add -# CHECKVENEER-NEXT: br -# CHECKVENEER-NEXT: nop - adrp x16, foo2 - add x16, x16, #:lo12:foo2 - br x16 - nop diff --git a/bolt/test/AArch64/veneer.s b/bolt/test/AArch64/veneer.s deleted file mode 100644 index 5cf4a1420148..000000000000 --- a/bolt/test/AArch64/veneer.s +++ /dev/null @@ -1,38 +0,0 @@ -// This test checks that the veneer are properly handled by BOLT. - -# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ -# RUN: %s -o %t.o -# RUN: %clang %cflags -fPIC -pie %t.o -o %t.exe \ -# RUN: -nostartfiles -nodefaultlibs -Wl,-z,notext \ -# RUN: -fuse-ld=lld -Wl,--no-relax -# RUN: llvm-objdump -d --disassemble-symbols='myveneer' %t.exe | \ -# RUN: FileCheck --check-prefix=CHECKVENEER %s -# RUN: llvm-bolt %t.exe -o %t.bolt --elim-link-veneers=true --lite=0 -# RUN: llvm-objdump -d --disassemble-symbols='_start' %t.bolt | FileCheck %s - -.text -.balign 4 -.global foo -.type foo, %function -foo: - ret -.size foo, .-foo - -.global myveneer -.type myveneer, %function -myveneer: -# CHECKVENEER: adrp -# CHECKVENEER-NEXT: add - adrp x16, foo - add x16, x16, #:lo12:foo - br x16 - nop -.size myveneer, .-myveneer - -.global _start -.type _start, %function -_start: -# CHECK: {{.*}} bl {{.*}} - bl myveneer - ret -.size _start, .-_start