[lldb/DWARF] Fix PC value for artificial tail call frames for the "GNU" case

Summary:
The way that the support for the GNU dialect of tail call frames was
implemented in D80519 meant that the were reporting very bogus PC values
which pointed into the middle of an instruction: the -1 trick is
necessary for the address to resolve to the right function, but we
should still be reporting a more realistic PC value -- I say "realistic"
and not "real", because it's very debatable what should be the correct
PC value for frames like this.

This patch achieves that my moving the -1 from SymbolFileDWARF into the
stack frame computation code. The idea is that SymbolFileDWARF will
merely report whether it has provided an address of the instruction
after the tail call, or the address of the call instruction itself. The
StackFrameList machinery uses this information to set the "behaves like
frame zero" property of the artificial frames (the main thing this flag
does is it controls the -1 subtraction when looking up the function
address).

This required a moderate refactor of the CallEdge class, because it was
implicitly assuming that edges pointing after the call were real calls
and those pointing the the call insn were tail calls. The class now
carries this information explicitly -- it carries three mostly
independent pieces of information:
- an address of interest in the caller
- a bit saying whether this address points to the call insn or after it
- whether this is a tail call

Reviewers: vsk, dblaikie

Subscribers: aprantl, mgrang, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D81010
This commit is contained in:
Pavel Labath 2020-06-02 16:01:11 +02:00
parent 603b974cf7
commit 0081149f96
5 changed files with 101 additions and 68 deletions

View File

@ -266,6 +266,7 @@ using CallSiteParameterArray = llvm::SmallVector<CallSiteParameter, 0>;
/// in the call graph between two functions, or to evaluate DW_OP_entry_value. /// in the call graph between two functions, or to evaluate DW_OP_entry_value.
class CallEdge { class CallEdge {
public: public:
enum class AddrType : uint8_t { Call, AfterCall };
virtual ~CallEdge() {} virtual ~CallEdge() {}
/// Get the callee's definition. /// Get the callee's definition.
@ -281,21 +282,32 @@ public:
/// made the call. /// made the call.
lldb::addr_t GetReturnPCAddress(Function &caller, Target &target) const; lldb::addr_t GetReturnPCAddress(Function &caller, Target &target) const;
/// Like \ref GetReturnPCAddress, but returns an unresolved file address. /// Return an address in the caller. This can either be the address of the
lldb::addr_t GetUnresolvedReturnPCAddress() const { return return_pc; } /// call instruction, or the address of the instruction after the call.
std::pair<AddrType, lldb::addr_t> GetCallerAddress(Function &caller,
Target &target) const {
return {caller_address_type,
GetLoadAddress(caller_address, caller, target)};
}
/// Get the load PC address of the call instruction (or LLDB_INVALID_ADDRESS). bool IsTailCall() const { return is_tail_call; }
lldb::addr_t GetCallInstPC(Function &caller, Target &target) const;
/// Get the call site parameters available at this call edge. /// Get the call site parameters available at this call edge.
llvm::ArrayRef<CallSiteParameter> GetCallSiteParameters() const { llvm::ArrayRef<CallSiteParameter> GetCallSiteParameters() const {
return parameters; return parameters;
} }
/// Non-tail-calls go first, sorted by the return address. They are followed
/// by tail calls, which have no specific order.
std::pair<bool, lldb::addr_t> GetSortKey() const {
return {is_tail_call, GetUnresolvedReturnPCAddress()};
}
protected: protected:
CallEdge(lldb::addr_t return_pc, lldb::addr_t call_inst_pc, CallEdge(AddrType caller_address_type, lldb::addr_t caller_address,
CallSiteParameterArray &&parameters) bool is_tail_call, CallSiteParameterArray &&parameters)
: return_pc(return_pc), call_inst_pc(call_inst_pc), : caller_address(caller_address),
caller_address_type(caller_address_type), is_tail_call(is_tail_call),
parameters(std::move(parameters)) {} parameters(std::move(parameters)) {}
/// Helper that finds the load address of \p unresolved_pc, a file address /// Helper that finds the load address of \p unresolved_pc, a file address
@ -303,13 +315,17 @@ protected:
static lldb::addr_t GetLoadAddress(lldb::addr_t unresolved_pc, static lldb::addr_t GetLoadAddress(lldb::addr_t unresolved_pc,
Function &caller, Target &target); Function &caller, Target &target);
/// An invalid address if this is a tail call. Otherwise, the return PC for /// Like \ref GetReturnPCAddress, but returns an unresolved file address.
/// the call. Note that this is a file address which must be resolved. lldb::addr_t GetUnresolvedReturnPCAddress() const {
lldb::addr_t return_pc; return caller_address_type == AddrType::AfterCall && !is_tail_call
? caller_address
: LLDB_INVALID_ADDRESS;
}
/// The address of the call instruction. Usually an invalid address, unless private:
/// this is a tail call. lldb::addr_t caller_address;
lldb::addr_t call_inst_pc; AddrType caller_address_type;
bool is_tail_call;
CallSiteParameterArray parameters; CallSiteParameterArray parameters;
}; };
@ -321,9 +337,11 @@ class DirectCallEdge : public CallEdge {
public: public:
/// Construct a call edge using a symbol name to identify the callee, and a /// Construct a call edge using a symbol name to identify the callee, and a
/// return PC within the calling function to identify a specific call site. /// return PC within the calling function to identify a specific call site.
DirectCallEdge(const char *symbol_name, lldb::addr_t return_pc, DirectCallEdge(const char *symbol_name, AddrType caller_address_type,
lldb::addr_t call_inst_pc, CallSiteParameterArray &&parameters) lldb::addr_t caller_address, bool is_tail_call,
: CallEdge(return_pc, call_inst_pc, std::move(parameters)) { CallSiteParameterArray &&parameters)
: CallEdge(caller_address_type, caller_address, is_tail_call,
std::move(parameters)) {
lazy_callee.symbol_name = symbol_name; lazy_callee.symbol_name = symbol_name;
} }
@ -352,10 +370,11 @@ class IndirectCallEdge : public CallEdge {
public: public:
/// Construct a call edge using a DWARFExpression to identify the callee, and /// Construct a call edge using a DWARFExpression to identify the callee, and
/// a return PC within the calling function to identify a specific call site. /// a return PC within the calling function to identify a specific call site.
IndirectCallEdge(DWARFExpression call_target, lldb::addr_t return_pc, IndirectCallEdge(DWARFExpression call_target, AddrType caller_address_type,
lldb::addr_t call_inst_pc, lldb::addr_t caller_address, bool is_tail_call,
CallSiteParameterArray &&parameters) CallSiteParameterArray &&parameters)
: CallEdge(return_pc, call_inst_pc, std::move(parameters)), : CallEdge(caller_address_type, caller_address, is_tail_call,
std::move(parameters)),
call_target(std::move(call_target)) {} call_target(std::move(call_target)) {}
Function *GetCallee(ModuleList &images, ExecutionContext &exe_ctx) override; Function *GetCallee(ModuleList &images, ExecutionContext &exe_ctx) override;

View File

@ -3733,26 +3733,30 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
child.GetCU()); child.GetCU());
} }
} }
if (child.Tag() == DW_TAG_GNU_call_site) {
// In DWARF v4 DW_AT_low_pc is always the address of the instruction after
// the call. For tail calls, approximate the address of the call
// instruction by subtracting 1.
if (tail_call)
call_inst_pc = low_pc - 1;
else
return_pc = low_pc;
}
if (!call_origin && !call_target) { if (!call_origin && !call_target) {
LLDB_LOG(log, "CollectCallEdges: call site without any call target"); LLDB_LOG(log, "CollectCallEdges: call site without any call target");
continue; continue;
} }
addr_t caller_address;
CallEdge::AddrType caller_address_type;
if (return_pc != LLDB_INVALID_ADDRESS) {
caller_address = return_pc;
caller_address_type = CallEdge::AddrType::AfterCall;
} else if (low_pc != LLDB_INVALID_ADDRESS) {
caller_address = low_pc;
caller_address_type = CallEdge::AddrType::AfterCall;
} else if (call_inst_pc != LLDB_INVALID_ADDRESS) {
caller_address = call_inst_pc;
caller_address_type = CallEdge::AddrType::Call;
} else {
LLDB_LOG(log, "CollectCallEdges: No caller address");
continue;
}
// Adjust any PC forms. It needs to be fixed up if the main executable // Adjust any PC forms. It needs to be fixed up if the main executable
// contains a debug map (i.e. pointers to object files), because we need a // contains a debug map (i.e. pointers to object files), because we need a
// file address relative to the executable's text section. // file address relative to the executable's text section.
return_pc = FixupAddress(return_pc); caller_address = FixupAddress(caller_address);
call_inst_pc = FixupAddress(call_inst_pc);
// Extract call site parameters. // Extract call site parameters.
CallSiteParameterArray parameters = CallSiteParameterArray parameters =
@ -3764,9 +3768,9 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
"CollectCallEdges: Found call origin: {0} (retn-PC: {1:x}) " "CollectCallEdges: Found call origin: {0} (retn-PC: {1:x}) "
"(call-PC: {2:x})", "(call-PC: {2:x})",
call_origin->GetPubname(), return_pc, call_inst_pc); call_origin->GetPubname(), return_pc, call_inst_pc);
edge = std::make_unique<DirectCallEdge>(call_origin->GetMangledName(), edge = std::make_unique<DirectCallEdge>(
return_pc, call_inst_pc, call_origin->GetMangledName(), caller_address_type, caller_address,
std::move(parameters)); tail_call, std::move(parameters));
} else { } else {
if (log) { if (log) {
StreamString call_target_desc; StreamString call_target_desc;
@ -3776,7 +3780,8 @@ SymbolFileDWARF::CollectCallEdges(ModuleSP module, DWARFDIE function_die) {
call_target_desc.GetString()); call_target_desc.GetString());
} }
edge = std::make_unique<IndirectCallEdge>( edge = std::make_unique<IndirectCallEdge>(
*call_target, return_pc, call_inst_pc, std::move(parameters)); *call_target, caller_address_type, caller_address, tail_call,
std::move(parameters));
} }
if (log && parameters.size()) { if (log && parameters.size()) {

View File

@ -145,11 +145,7 @@ lldb::addr_t CallEdge::GetLoadAddress(lldb::addr_t unresolved_pc,
lldb::addr_t CallEdge::GetReturnPCAddress(Function &caller, lldb::addr_t CallEdge::GetReturnPCAddress(Function &caller,
Target &target) const { Target &target) const {
return GetLoadAddress(return_pc, caller, target); return GetLoadAddress(GetUnresolvedReturnPCAddress(), caller, target);
}
lldb::addr_t CallEdge::GetCallInstPC(Function &caller, Target &target) const {
return GetLoadAddress(call_inst_pc, caller, target);
} }
void DirectCallEdge::ParseSymbolFileAndResolve(ModuleList &images) { void DirectCallEdge::ParseSymbolFileAndResolve(ModuleList &images) {
@ -313,36 +309,34 @@ llvm::ArrayRef<std::unique_ptr<CallEdge>> Function::GetCallEdges() {
m_call_edges = sym_file->ParseCallEdgesInFunction(GetID()); m_call_edges = sym_file->ParseCallEdgesInFunction(GetID());
// Sort the call edges to speed up return_pc lookups. // Sort the call edges to speed up return_pc lookups.
llvm::sort(m_call_edges.begin(), m_call_edges.end(), llvm::sort(m_call_edges, [](const std::unique_ptr<CallEdge> &LHS,
[](const std::unique_ptr<CallEdge> &LHS, const std::unique_ptr<CallEdge> &RHS) {
const std::unique_ptr<CallEdge> &RHS) { return LHS->GetSortKey() < RHS->GetSortKey();
return LHS->GetUnresolvedReturnPCAddress() < });
RHS->GetUnresolvedReturnPCAddress();
});
return m_call_edges; return m_call_edges;
} }
llvm::ArrayRef<std::unique_ptr<CallEdge>> Function::GetTailCallingEdges() { llvm::ArrayRef<std::unique_ptr<CallEdge>> Function::GetTailCallingEdges() {
// Call edges are sorted by return PC, and tail calling edges have invalid // Tail calling edges are sorted at the end of the list. Find them by dropping
// return PCs. Find them at the end of the list. // all non-tail-calls.
return GetCallEdges().drop_until([](const std::unique_ptr<CallEdge> &edge) { return GetCallEdges().drop_until(
return edge->GetUnresolvedReturnPCAddress() == LLDB_INVALID_ADDRESS; [](const std::unique_ptr<CallEdge> &edge) { return edge->IsTailCall(); });
});
} }
CallEdge *Function::GetCallEdgeForReturnAddress(addr_t return_pc, CallEdge *Function::GetCallEdgeForReturnAddress(addr_t return_pc,
Target &target) { Target &target) {
auto edges = GetCallEdges(); auto edges = GetCallEdges();
auto edge_it = auto edge_it =
std::lower_bound(edges.begin(), edges.end(), return_pc, llvm::partition_point(edges, [&](const std::unique_ptr<CallEdge> &edge) {
[&](const std::unique_ptr<CallEdge> &edge, addr_t pc) { return std::make_pair(edge->IsTailCall(),
return edge->GetReturnPCAddress(*this, target) < pc; edge->GetReturnPCAddress(*this, target)) <
}); std::make_pair(false, return_pc);
});
if (edge_it == edges.end() || if (edge_it == edges.end() ||
edge_it->get()->GetReturnPCAddress(*this, target) != return_pc) edge_it->get()->GetReturnPCAddress(*this, target) != return_pc)
return nullptr; return nullptr;
return &const_cast<CallEdge &>(*edge_it->get()); return edge_it->get();
} }
Block &Function::GetBlock(bool can_create) { Block &Function::GetBlock(bool can_create) {

View File

@ -239,7 +239,12 @@ void StackFrameList::GetOnlyConcreteFramesUpTo(uint32_t end_idx,
/// A sequence of calls that comprise some portion of a backtrace. Each frame /// A sequence of calls that comprise some portion of a backtrace. Each frame
/// is represented as a pair of a callee (Function *) and an address within the /// is represented as a pair of a callee (Function *) and an address within the
/// callee. /// callee.
using CallSequence = std::vector<std::pair<Function *, addr_t>>; struct CallDescriptor {
Function *func;
CallEdge::AddrType address_type;
addr_t address;
};
using CallSequence = std::vector<CallDescriptor>;
/// Find the unique path through the call graph from \p begin (with return PC /// Find the unique path through the call graph from \p begin (with return PC
/// \p return_pc) to \p end. On success this path is stored into \p path, and /// \p return_pc) to \p end. On success this path is stored into \p path, and
@ -319,14 +324,14 @@ static void FindInterveningFrames(Function &begin, Function &end,
} }
// Search the calls made from this callee. // Search the calls made from this callee.
active_path.emplace_back(&callee, LLDB_INVALID_ADDRESS); active_path.push_back(CallDescriptor{&callee});
for (const auto &edge : callee.GetTailCallingEdges()) { for (const auto &edge : callee.GetTailCallingEdges()) {
Function *next_callee = edge->GetCallee(images, context); Function *next_callee = edge->GetCallee(images, context);
if (!next_callee) if (!next_callee)
continue; continue;
addr_t tail_call_pc = edge->GetCallInstPC(callee, target); std::tie(active_path.back().address_type, active_path.back().address) =
active_path.back().second = tail_call_pc; edge->GetCallerAddress(callee, target);
dfs(*edge, *next_callee); dfs(*edge, *next_callee);
if (ambiguous) if (ambiguous)
@ -400,16 +405,16 @@ void StackFrameList::SynthesizeTailCallFrames(StackFrame &next_frame) {
// Push synthetic tail call frames. // Push synthetic tail call frames.
for (auto calleeInfo : llvm::reverse(path)) { for (auto calleeInfo : llvm::reverse(path)) {
Function *callee = calleeInfo.first; Function *callee = calleeInfo.func;
uint32_t frame_idx = m_frames.size(); uint32_t frame_idx = m_frames.size();
uint32_t concrete_frame_idx = next_frame.GetConcreteFrameIndex(); uint32_t concrete_frame_idx = next_frame.GetConcreteFrameIndex();
addr_t cfa = LLDB_INVALID_ADDRESS; addr_t cfa = LLDB_INVALID_ADDRESS;
bool cfa_is_valid = false; bool cfa_is_valid = false;
addr_t pc = calleeInfo.second; addr_t pc = calleeInfo.address;
// We do not want to subtract 1 from this PC, as it's the actual address // If the callee address refers to the call instruction, we do not want to
// of the tail-calling branch instruction. This address is provided by the // subtract 1 from this value.
// compiler via DW_AT_call_pc. const bool behaves_like_zeroth_frame =
constexpr bool behaves_like_zeroth_frame = true; calleeInfo.address_type == CallEdge::AddrType::Call;
SymbolContext sc; SymbolContext sc;
callee->CalculateSymbolContext(&sc); callee->CalculateSymbolContext(&sc);
auto synth_frame = std::make_shared<StackFrame>( auto synth_frame = std::make_shared<StackFrame>(

View File

@ -3,12 +3,22 @@ volatile int x;
void __attribute__((noinline)) sink() { void __attribute__((noinline)) sink() {
x++; //% self.filecheck("bt", "main.cpp", "-implicit-check-not=artificial") x++; //% self.filecheck("bt", "main.cpp", "-implicit-check-not=artificial")
// CHECK: frame #0: 0x{{[0-9a-f]+}} a.out`sink() at main.cpp:[[@LINE-1]]:4 // CHECK: frame #0: 0x{{[0-9a-f]+}} a.out`sink() at main.cpp:[[@LINE-1]]:4
// CHECK-NEXT: frame #1: 0x{{[0-9a-f]+}} a.out`func3() at main.cpp:16:3 // CHECK-NEXT: frame #1: 0x{{[0-9a-f]+}} a.out`func3() at main.cpp:26:3
// CHECK-SAME: [artificial] // CHECK-SAME: [artificial]
// CHECK-NEXT: frame #2: 0x{{[0-9a-f]+}} a.out`func2() // CHECK-NEXT: frame #2: 0x{{[0-9a-f]+}} a.out`func2()
// CHECK-NEXT: frame #3: 0x{{[0-9a-f]+}} a.out`func1() at main.cpp:25:3 // CHECK-NEXT: frame #3: 0x{{[0-9a-f]+}} a.out`func1() at main.cpp:35:3
// CHECK-SAME: [artificial] // CHECK-SAME: [artificial]
// CHECK-NEXT: frame #4: 0x{{[0-9a-f]+}} a.out`main // CHECK-NEXT: frame #4: 0x{{[0-9a-f]+}} a.out`main
// In the GNU style, the artificial frames will point after the tail call
// instruction. In v5 they should point to the instruction itself.
//% frame1 = self.thread().GetFrameAtIndex(1)
//% func3 = frame1.GetFunction()
//% func3_insns = func3.GetInstructions(self.target())
//% self.trace("func3:\n%s"%func3_insns)
//% last_insn = func3_insns.GetInstructionAtIndex(func3_insns.GetSize()-1)
//% addr = last_insn.GetAddress()
//% if "GNU" in self.name: addr.OffsetAddress(last_insn.GetByteSize())
//% self.assertEquals(frame1.GetPCAddress(), addr)
} }
void __attribute__((noinline)) func3() { void __attribute__((noinline)) func3() {