forked from OSchip/llvm-project
Fix assembly after adding entry points
Summary: When a given function B, located after function A, references one of A's basic blocks, it registers a new global symbol at the reference address and update A's Labels vector via BinaryFunction::addEntryPoint(). However, we don't update A's branch targets at this point. So we end up with an inconsistent CFG, where the basic block names are global symbols, but the internal branch operands are still referencing the old local name of the corresponding blocks that got promoted to an entry point. This patch fix this by detecting this situation in addEntryPoint and iterating over all instructions, looking for references to the old symbol and replacing them to use the new global symbol (since this is now an entry point). Fixes facebookincubator/BOLT#26 (cherry picked from FBD8728407)
This commit is contained in:
parent
544d1577c1
commit
12380b8b06
|
@ -1928,6 +1928,20 @@ void BinaryFunction::removeTagsFromProfile() {
|
|||
}
|
||||
}
|
||||
|
||||
void BinaryFunction::updateReferences(const MCSymbol *From, const MCSymbol *To) {
|
||||
assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
|
||||
assert(From && To && "invalid symbols");
|
||||
|
||||
for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
|
||||
auto &Inst = I->second;
|
||||
for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) {
|
||||
const MCSymbol *S = BC.MIB->getTargetSymbol(Inst, I);
|
||||
if (S == From)
|
||||
BC.MIB->setOperandToSymbolRef(Inst, I, To, 0, &*BC.Ctx, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BinaryFunction::addEntryPoint(uint64_t Address) {
|
||||
assert(containsAddress(Address) && "address does not belong to the function");
|
||||
|
||||
|
@ -1943,6 +1957,8 @@ void BinaryFunction::addEntryPoint(uint64_t Address) {
|
|||
// If we haven't built CFG for the function, we can add a new entry point
|
||||
// even if it doesn't have an associated entry in the symbol table.
|
||||
if (CurrentState == State::Empty || CurrentState == State::Disassembled) {
|
||||
auto Iter = Labels.find(Offset);
|
||||
const MCSymbol *OldSym = Iter != Labels.end() ? Iter->second : nullptr;
|
||||
if (!EntrySymbol) {
|
||||
DEBUG(dbgs() << "creating local label\n");
|
||||
EntrySymbol = getOrCreateLocalLabel(Address);
|
||||
|
@ -1951,6 +1967,9 @@ void BinaryFunction::addEntryPoint(uint64_t Address) {
|
|||
}
|
||||
addEntryPointAtOffset(Address - getAddress());
|
||||
Labels.emplace(Offset, EntrySymbol);
|
||||
if (OldSym != nullptr && EntrySymbol != OldSym) {
|
||||
updateReferences(OldSym, EntrySymbol);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -639,6 +639,10 @@ private:
|
|||
return getOrCreateLocalLabel(getAddress() + Offset);
|
||||
}
|
||||
|
||||
/// Update all \p From references in the code to refer to \p To. Used
|
||||
/// in disassembled state only.
|
||||
void updateReferences(const MCSymbol *From, const MCSymbol *To);
|
||||
|
||||
/// This is called in disassembled state.
|
||||
void addEntryPoint(uint64_t Address);
|
||||
|
||||
|
|
|
@ -439,3 +439,22 @@ MCPlusBuilder::getRegSize(MCPhysReg Reg) const {
|
|||
|
||||
return SizeMap[Reg];
|
||||
}
|
||||
|
||||
bool MCPlusBuilder::setOperandToSymbolRef(MCInst &Inst, int OpNum,
|
||||
const MCSymbol *Symbol,
|
||||
int64_t Addend, MCContext *Ctx,
|
||||
uint64_t RelType) const {
|
||||
MCOperand Operand;
|
||||
if (!Addend) {
|
||||
Operand = MCOperand::createExpr(getTargetExprFor(
|
||||
Inst, MCSymbolRefExpr::create(Symbol, *Ctx), *Ctx, RelType));
|
||||
} else {
|
||||
Operand = MCOperand::createExpr(getTargetExprFor(
|
||||
Inst,
|
||||
MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, *Ctx),
|
||||
MCConstantExpr::create(Addend, *Ctx), *Ctx),
|
||||
*Ctx, RelType));
|
||||
}
|
||||
Inst.getOperand(OpNum) = Operand;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -824,6 +824,12 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Discard operand \p OpNum replacing it by a new MCOperand that is a
|
||||
/// MCExpr referencing \p Symbol + \p Addend.
|
||||
virtual bool setOperandToSymbolRef(MCInst &Inst, int OpNum,
|
||||
const MCSymbol *Symbol, int64_t Addend,
|
||||
MCContext *Ctx, uint64_t RelType) const;
|
||||
|
||||
/// Replace an immediate operand in the instruction \p Inst with a reference
|
||||
/// of the passed \p Symbol plus \p Addend. If the instruction does not have
|
||||
/// an immediate operand or has more than one - then return false. Otherwise
|
||||
|
|
|
@ -968,24 +968,6 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
bool setOperandToSymbolRef(MCInst &Inst, int OpNum, MCSymbol *Symbol,
|
||||
int64_t Addend, MCContext *Ctx,
|
||||
uint64_t RelType) const {
|
||||
MCOperand Operand;
|
||||
if (!Addend) {
|
||||
Operand = MCOperand::createExpr(getTargetExprFor(
|
||||
Inst, MCSymbolRefExpr::create(Symbol, *Ctx), *Ctx, RelType));
|
||||
} else {
|
||||
Operand = MCOperand::createExpr(getTargetExprFor(
|
||||
Inst,
|
||||
MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, *Ctx),
|
||||
MCConstantExpr::create(Addend, *Ctx), *Ctx),
|
||||
*Ctx, RelType));
|
||||
}
|
||||
Inst.getOperand(OpNum) = Operand;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool replaceImmWithSymbol(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
|
||||
MCContext *Ctx, int64_t &Value,
|
||||
uint64_t RelType) const override {
|
||||
|
|
|
@ -2770,18 +2770,8 @@ public:
|
|||
|
||||
Value = Inst.getOperand(ImmOpNo).getImm();
|
||||
|
||||
MCOperand Operand;
|
||||
if (!Addend) {
|
||||
Operand = MCOperand::createExpr(getTargetExprFor(
|
||||
Inst, MCSymbolRefExpr::create(Symbol, *Ctx), *Ctx, RelType));
|
||||
} else {
|
||||
Operand = MCOperand::createExpr(getTargetExprFor(
|
||||
Inst,
|
||||
MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, *Ctx),
|
||||
MCConstantExpr::create(Addend, *Ctx), *Ctx),
|
||||
*Ctx, RelType));
|
||||
}
|
||||
Inst.getOperand(ImmOpNo) = Operand;
|
||||
setOperandToSymbolRef(Inst, ImmOpNo, Symbol, Addend, Ctx, RelType);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
--- !ELF
|
||||
FileHeader:
|
||||
Class: ELFCLASS64
|
||||
Data: ELFDATA2LSB
|
||||
Type: ET_EXEC
|
||||
Machine: EM_X86_64
|
||||
Entry: 0x00000000004004FA
|
||||
Sections:
|
||||
- Name: .text
|
||||
Type: SHT_PROGBITS
|
||||
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
|
||||
Address: 0x00000000004003E0
|
||||
AddressAlign: 0x0000000000000010
|
||||
Content: 31ED4989D15E4889E24883E4F0505449C7C07005400048C7C10005400048C7C7FA044000E8B7FFFFFFF4660F1F440000B82F10600055482D281060004883F80E4889E577025DC3B8000000004885C074F45DBF28106000FFE00F1F8000000000B82810600055482D2810600048C1F8034889E54889C248C1EA3F4801D048D1F875025DC3BA000000004885D274F45D4889C6BF28106000FFE20F1F8000000000803D9D0B2000007511554889E5E87EFFFFFF5DC6058A0B200001F3C30F1F400048833D7809200000741EB8000000004885C0741455BF200E60004889E5FFD05DE97BFFFFFF0F1F00E973FFFFFF648B0425B0FCFFFF39C70F850C0000004839160F850400000048890EC3B8FFFFFFFFC34839FE0F84F0FFFFFFC34831C0C3669041574189FF41564989F641554989D541544C8D25F808200055488D2DF8082000534C29E531DB48C1FD034883EC08E85DFEFFFF4885ED741E0F1F8400000000004C89EA4C89F64489FF41FF14DC4883C3014839EB75EA4883C4085B5D415C415D415E415FC390662E0F1F840000000000F3C3
|
||||
- Name: .rela.text
|
||||
Type: SHT_RELA
|
||||
Flags: [ SHF_INFO_LINK ]
|
||||
Link: .symtab
|
||||
AddressAlign: 0x0000000000000008
|
||||
Info: .text
|
||||
Relocations:
|
||||
- Name: .rodata
|
||||
Type: SHT_PROGBITS
|
||||
Flags: [ SHF_ALLOC ]
|
||||
Address: 0x0000000000400580
|
||||
AddressAlign: 0x0000000000000008
|
||||
Content: '01000200000000000000000000000000'
|
||||
- Name: .dynamic
|
||||
Type: SHT_DYNAMIC
|
||||
Flags: [ SHF_WRITE, SHF_ALLOC ]
|
||||
Address: 0x0000000000600E28
|
||||
Link: .dynstr
|
||||
AddressAlign: 0x0000000000000008
|
||||
Content: 010000000000000001000000000000000C0000000000000090034000000000000D0000000000000074054000000000001900000000000000100E6000000000001B0000000000000008000000000000001A00000000000000180E6000000000001C000000000000000800000000000000F5FEFF6F000000009802400000000000050000000000000000034000000000000600000000000000B8024000000000000A0000000000000038000000000000000B0000000000000018000000000000001500000000000000000000000000000003000000000000000010600000000000020000000000000018000000000000001400000000000000070000000000000017000000000000007803400000000000070000000000000060034000000000000800000000000000180000000000000009000000000000001800000000000000FEFFFF6F000000004003400000000000FFFFFF6F000000000100000000000000F0FFFF6F000000003803400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
||||
Symbols:
|
||||
Global:
|
||||
- Name: FUNC
|
||||
Type: STT_FUNC
|
||||
Section: .text
|
||||
Value: 0x00000000004004F0
|
||||
Size: 0x000000000000000A
|
||||
- Name: main
|
||||
Type: STT_FUNC
|
||||
Section: .text
|
||||
Value: 0x00000000004004FA
|
||||
Size: 0x0000000000000004
|
||||
- Name: XYZ
|
||||
Type: STT_FUNC
|
||||
Section: .text
|
||||
Value: 0x00000000004004CD
|
||||
Size: 0x0000000000000023
|
||||
DynamicSymbols:
|
||||
Global:
|
||||
- Name: mydata
|
||||
Section: .rodata
|
||||
Value: 0x0000000000400100
|
||||
ProgramHeaders:
|
||||
- Type: PT_PHDR
|
||||
Flags: [ PF_X, PF_R ]
|
||||
VAddr: 0x00400000
|
||||
PAddr: 0x00400000
|
||||
Sections:
|
||||
- Section: .text
|
||||
- Type: PT_LOAD
|
||||
Flags: [ PF_X, PF_R ]
|
||||
VAddr: 0x00400000
|
||||
PAddr: 0x00400000
|
||||
Sections:
|
||||
- Section: .text
|
||||
- Type: PT_DYNAMIC
|
||||
Flags: [ PF_X, PF_R ]
|
||||
VAddr: 0x0061ADA8
|
||||
PAddr: 0x0064ADA8
|
||||
Sections:
|
||||
- Section: .dynamic
|
||||
...
|
|
@ -0,0 +1,9 @@
|
|||
# This reproduces issue 26 from our github repo
|
||||
|
||||
# RUN: yaml2obj %p/Inputs/issue26.yaml &> %t.exe
|
||||
# RUN: llvm-bolt %t.exe -relocs -print-cfg -o %t.out \
|
||||
# RUN: | FileCheck %s
|
||||
|
||||
CHECK-NOT: BOLT-WARNING: CFG invalid in XYZ @ .LBB0
|
||||
CHECK: Binary Function "XYZ"
|
||||
CHECK: 0000000a: jne FUNCat0x4004e9
|
Loading…
Reference in New Issue