Add exception handling information to CFG.

Summary:
Read .gcc_except_table and add information to CFG. Calls have extra operands
indicating there's a possible handler for exceptions and an action. Landing
pad information is recorded in BinaryFunction.

Also convert JMP instructions that are calls into tail calls pseudo
instructions so that they don't miss call instruction analysis.

(cherry picked from FBD2652775)
This commit is contained in:
Maksim Panchenko 2015-11-12 18:56:58 -08:00
parent 2117362a09
commit be2a19523c
4 changed files with 243 additions and 12 deletions

View File

@ -99,6 +99,22 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
// Offset of the instruction in function.
uint64_t Offset{0};
auto printInstruction = [&](const MCInst &Instruction) {
OS << format(" %08" PRIx64 ": ", Offset);
BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI);
if (BC.MIA->isCall(Instruction)) {
if (BC.MIA->isTailCall(Instruction))
OS << " # TAILCALL ";
if (Instruction.getNumOperands() > 1) {
OS << " # handler: " << Instruction.getOperand(1);
OS << "; action: " << Instruction.getOperand(2);
}
}
OS << "\n";
// In case we need MCInst printer:
// Instr.dump_pretty(OS, InstructionPrinter.get());
};
if (BasicBlocks.empty() && !Instructions.empty()) {
// Print before CFG was built.
for (const auto &II : Instructions) {
@ -109,10 +125,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
if (LI != Labels.end())
OS << LI->second->getName() << ":\n";
auto &Instruction = II.second;
OS << format(" %08" PRIx64 ": ", Offset);
BC.InstPrinter->printInst(&Instruction, OS, "", *BC.STI);
OS << "\n";
printInstruction(II.second);
}
}
@ -121,6 +134,10 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
<< BB->Instructions.size() << " instructions, align : "
<< BB->getAlignment() << ")\n";
if (LandingPads.find(BB->getLabel()) != LandingPads.end()) {
OS << " Landing Pad\n";
}
uint64_t BBExecCount = BB->getExecutionCount();
if (BBExecCount != BinaryBasicBlock::COUNT_NO_PROFILE) {
OS << " Exec Count : " << BBExecCount << "\n";
@ -138,12 +155,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
Offset = RoundUpToAlignment(Offset, BB->getAlignment());
for (auto &Instr : *BB) {
OS << format(" %08" PRIx64 ": ", Offset);
BC.InstPrinter->printInst(&Instr, OS, "", *BC.STI);
OS << "\n";
// In case we need MCInst printer:
// Instr.dump_pretty(OS, InstructionPrinter.get());
printInstruction(Instr);
// Calculate the size of the instruction.
// Note: this is imprecise since happening prior to relaxation.
@ -298,8 +310,14 @@ bool BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
<< ". Code size will be increased.\n";
}
// This is a call regardless of the opcode (e.g. tail call).
IsCall = true;
// This is a call regardless of the opcode.
// Assign proper opcode for tail calls, so that they could be
// treated as calls.
if (!IsCall) {
MIA->convertJmpToTailCall(Instruction);
IsCall = true;
}
TargetSymbol = BC.getOrCreateGlobalSymbol(InstructionTarget,
"FUNCat");
}

View File

@ -119,6 +119,16 @@ private:
/// The profile data for the number of times the function was executed.
uint64_t ExecutionCount{COUNT_NO_PROFILE};
/// Binary blob reprsenting action, type, and type index tables for this
/// function' LSDA (exception handling).
ArrayRef<uint8_t> LSDATables;
/// Original LSDA address for the function.
uint64_t LSDAAddress{0};
/// Landing pads for the function.
std::set<MCSymbol *> LandingPads;
/// Release storage used by instructions.
BinaryFunction &clearInstructions() {
InstrMapType TempMap;
@ -401,6 +411,12 @@ public:
return *this;
}
/// Set LSDA address for the function.
BinaryFunction &setLSDAAddress(uint64_t Address) {
LSDAAddress = Address;
return *this;
}
/// Return the profile information about the number of times
/// the function was executed.
///
@ -409,6 +425,11 @@ public:
return ExecutionCount;
}
/// Return original LSDA address for the function or NULL.
uint64_t getLSDAAddress() const {
return LSDAAddress;
}
/// Disassemble function from raw data \p FunctionData.
/// If successful, this function will populate the list of instructions
/// for this function together with offsets from the function start
@ -445,6 +466,9 @@ public:
/// adding jumps based on a new layout order.
void fixBranches();
/// Process LSDA information for the function.
void parseLSDA(ArrayRef<uint8_t> LSDAData, uint64_t LSDAAddress);
virtual ~BinaryFunction() {}
};

View File

@ -252,6 +252,183 @@ void readLSDA(ArrayRef<uint8_t> LSDAData, BinaryContext &BC) {
}
}
void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
uint64_t LSDASectionAddress) {
assert(CurrentState == State::Disassembled && "unexpecrted function state");
if (!getLSDAAddress())
return;
assert(getLSDAAddress() < LSDASectionAddress + LSDASectionData.size() &&
"wrong LSDA address");
const uint8_t *Ptr =
LSDASectionData.data() + getLSDAAddress() - LSDASectionAddress;
uint8_t LPStartEncoding = *Ptr++;
uintptr_t LPStart = 0;
if (LPStartEncoding != DW_EH_PE_omit) {
LPStart = readEncodedPointer(Ptr, LPStartEncoding);
}
assert(LPStart == 0 && "support for split functions not implemented");
uint8_t TTypeEncoding = *Ptr++;
uintptr_t TTypeEnd = 0;
if (TTypeEncoding != DW_EH_PE_omit) {
TTypeEnd = readULEB128(Ptr);
}
if (opts::PrintExceptions) {
errs() << "LPStart Encoding = " << (unsigned)LPStartEncoding << '\n';
errs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
errs() << "TType Encoding = " << (unsigned)TTypeEncoding << '\n';
errs() << "TType End = " << TTypeEnd << '\n';
}
// Table to store list of indices in type table. Entries are uleb128s values.
auto TypeIndexTableStart = Ptr + TTypeEnd;
// Offset past the last decoded index.
intptr_t MaxTypeIndexTableOffset = 0;
// The actual type info table starts at the same location, but grows in
// different direction. Encoding is different too (TTypeEncoding).
auto TypeTableStart = reinterpret_cast<const uint32_t *>(Ptr + TTypeEnd);
uint8_t CallSiteEncoding = *Ptr++;
uint32_t CallSiteTableLength = readULEB128(Ptr);
const uint8_t *CallSiteTableStart = Ptr;
const uint8_t *CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
const uint8_t *CallSitePtr = CallSiteTableStart;
const uint8_t *ActionTableStart = CallSiteTableEnd;
if (opts::PrintExceptions) {
errs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
errs() << "CallSite table length = " << CallSiteTableLength << '\n';
errs() << '\n';
}
unsigned NumCallSites = 0;
while (CallSitePtr < CallSiteTableEnd) {
++NumCallSites;
uintptr_t Start = readEncodedPointer(CallSitePtr, CallSiteEncoding);
uintptr_t Length = readEncodedPointer(CallSitePtr, CallSiteEncoding);
uintptr_t LandingPad = readEncodedPointer(CallSitePtr, CallSiteEncoding);
uintptr_t ActionEntry = readULEB128(CallSitePtr);
uint64_t RangeBase = getAddress();
if (opts::PrintExceptions) {
errs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
<< ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
<< "); landing pad: 0x" << Twine::utohexstr(LPStart + LandingPad)
<< "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
}
// Create a handler entry if necessary.
MCSymbol *LPSymbol{nullptr};
if (LandingPad) {
auto Label = Labels.find(LandingPad);
if (Label != Labels.end()) {
LPSymbol = Label->second;
} else {
LPSymbol = BC.Ctx->createTempSymbol("LP");
Labels[LandingPad] = LPSymbol;
}
LandingPads.insert(LPSymbol);
}
// Mark all call instructions in the range.
auto II = Instructions.find(Start);
assert(II != Instructions.end() &&
"exception range not pointing to instruction");
do {
auto &Instruction = II->second;
if (BC.MIA->isCall(Instruction)) {
if (LPSymbol) {
Instruction.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(LPSymbol,
MCSymbolRefExpr::VK_None,
*BC.Ctx)));
} else {
Instruction.addOperand(MCOperand::createImm(0));
}
Instruction.addOperand(MCOperand::createImm(ActionEntry));
}
++II;
} while (II->first < Start + Length);
if (ActionEntry != 0) {
auto printType = [&] (int Index, raw_ostream &OS) {
assert(Index > 0 && "only positive indices are valid");
assert(TTypeEncoding == DW_EH_PE_udata4 &&
"only udata4 supported for TTypeEncoding");
auto TypeAddress = *(TypeTableStart - Index);
if (TypeAddress == 0) {
OS << "<all>";
return;
}
auto NI = BC.GlobalAddresses.find(TypeAddress);
if (NI != BC.GlobalAddresses.end()) {
OS << NI->second;
} else {
OS << "0x" << Twine::utohexstr(TypeAddress);
}
};
if (opts::PrintExceptions)
errs() << " actions: ";
const uint8_t *ActionPtr = ActionTableStart + ActionEntry - 1;
long long ActionType;
long long ActionNext;
auto Sep = "";
do {
ActionType = readSLEB128(ActionPtr);
auto Self = ActionPtr;
ActionNext = readSLEB128(ActionPtr);
if (opts::PrintExceptions)
errs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
if (ActionType == 0) {
if (opts::PrintExceptions)
errs() << "cleanup";
} else if (ActionType > 0) {
// It's an index into a type table.
if (opts::PrintExceptions) {
errs() << "catch type ";
printType(ActionType, errs());
}
} else { // ActionType < 0
if (opts::PrintExceptions)
errs() << "filter exception types ";
auto TSep = "";
// ActionType is a negative byte offset into uleb128-encoded table
// of indices with base 1.
// E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
// encoded using uleb128 so we cannot directly dereference them.
auto TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
while (auto Index = readULEB128(TypeIndexTablePtr)) {
if (opts::PrintExceptions) {
errs() << TSep;
printType(Index, errs());
TSep = ", ";
}
}
MaxTypeIndexTableOffset =
std::max(MaxTypeIndexTableOffset,
TypeIndexTablePtr - TypeIndexTableStart);
}
Sep = "; ";
ActionPtr = Self + ActionNext;
} while (ActionNext);
if (opts::PrintExceptions)
errs() << '\n';
}
}
if (opts::PrintExceptions)
errs() << '\n';
}
const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f;
@ -270,6 +447,8 @@ void CFIReader::fillCFIInfoFor(BinaryFunction &Function) const {
Function.getSize(), CurFDE.getAddressRange());
}
Function.setLSDAAddress(CurFDE.getLSDAAddress());
uint64_t Offset = 0;
uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();

View File

@ -457,6 +457,9 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
);
}
ArrayRef<uint8_t> LSDAData;
uint64_t LSDAAddress{0};
// Process special sections.
for (const auto &Section : File->sections()) {
StringRef SectionName;
@ -470,6 +473,8 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
if (SectionName == ".gcc_except_table") {
readLSDA(SectionData, *BC);
LSDAData = SectionData;
LSDAAddress = Section.getAddress();
}
}
@ -546,6 +551,11 @@ static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
if (EHFrame.ParseError.empty())
DwCFIReader.fillCFIInfoFor(Function);
// Parse LSDA.
if (Function.getLSDAAddress() != 0) {
Function.parseLSDA(LSDAData, LSDAAddress);
}
if (opts::PrintAll || opts::PrintDisasm)
Function.print(errs(), "after disassembly");