Add more detailed symbol type categorization, based on a swift patch by

Greg Clayton a few years ago.

My patch to augment the symbol table in Mach-O files with the
dyld trie exports data structure only categorized symbols as code
or data, but Greg Clayton had a patch to do something similar to
swift a few years ago that had a more extensive categorization of
symbols, as well as extracting some objc class/ivar names from the
entries. This patch is basically just Greg's, updated a bit and
with a test case added to it.

<rdar://problem/50791451>

Differential Revision: https://reviews.llvm.org/D77369
This commit is contained in:
Jason Molenda 2020-04-02 22:10:00 -07:00
parent 72e8754916
commit 836534f997
5 changed files with 245 additions and 46 deletions

View File

@ -2036,6 +2036,66 @@ static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset,
return true;
}
static SymbolType GetSymbolType(const char *&symbol_name,
bool &demangled_is_synthesized,
const SectionSP &text_section_sp,
const SectionSP &data_section_sp,
const SectionSP &data_dirty_section_sp,
const SectionSP &data_const_section_sp,
const SectionSP &symbol_section) {
SymbolType type = eSymbolTypeInvalid;
const char *symbol_sect_name = symbol_section->GetName().AsCString();
if (symbol_section->IsDescendant(text_section_sp.get())) {
if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
S_ATTR_SELF_MODIFYING_CODE |
S_ATTR_SOME_INSTRUCTIONS))
type = eSymbolTypeData;
else
type = eSymbolTypeCode;
} else if (symbol_section->IsDescendant(data_section_sp.get()) ||
symbol_section->IsDescendant(data_dirty_section_sp.get()) ||
symbol_section->IsDescendant(data_const_section_sp.get())) {
if (symbol_sect_name &&
::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
type = eSymbolTypeRuntime;
if (symbol_name) {
llvm::StringRef symbol_name_ref(symbol_name);
if (symbol_name_ref.startswith("OBJC_")) {
static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
static const llvm::StringRef g_objc_v2_prefix_metaclass(
"OBJC_METACLASS_$_");
static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
symbol_name = symbol_name + g_objc_v2_prefix_class.size();
type = eSymbolTypeObjCClass;
demangled_is_synthesized = true;
} else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
type = eSymbolTypeObjCMetaClass;
demangled_is_synthesized = true;
} else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
type = eSymbolTypeObjCIVar;
demangled_is_synthesized = true;
}
}
}
} else if (symbol_sect_name &&
::strstr(symbol_sect_name, "__gcc_except_tab") ==
symbol_sect_name) {
type = eSymbolTypeException;
} else {
type = eSymbolTypeData;
}
} else if (symbol_sect_name &&
::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) {
type = eSymbolTypeTrampoline;
}
return type;
}
// Read the UUID out of a dyld_shared_cache file on-disk.
UUID ObjectFileMachO::GetSharedCacheUUID(FileSpec dyld_shared_cache,
const ByteOrder byte_order,
@ -4536,22 +4596,20 @@ size_t ObjectFileMachO::ParseSymtab() {
Address symbol_addr;
if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
SectionSP symbol_section(symbol_addr.GetSection());
const char *symbol_name = e.entry.name.GetCString();
bool demangled_is_synthesized = false;
SymbolType type =
GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
data_section_sp, data_dirty_section_sp,
data_const_section_sp, symbol_section);
sym[sym_idx].SetType(type);
if (symbol_section) {
sym[sym_idx].SetID(synthetic_sym_id++);
sym[sym_idx].GetMangled().SetMangledName(e.entry.name);
switch (symbol_section->GetType()) {
case eSectionTypeCode:
sym[sym_idx].SetType(eSymbolTypeCode);
break;
case eSectionTypeOther:
case eSectionTypeData:
case eSectionTypeZeroFill:
sym[sym_idx].SetType(eSymbolTypeData);
break;
default:
break;
}
sym[sym_idx].SetIsSynthetic(false);
sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
if (demangled_is_synthesized)
sym[sym_idx].SetDemangledNameIsSynthesized(true);
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].SetExternal(true);
sym[sym_idx].GetAddressRef() = symbol_addr;
symbols_added.insert(symbol_addr.GetFileAddress());

View File

@ -1,7 +1,7 @@
CXX_SOURCES := main.cpp
OBJCXX_SOURCES := main.mm
EXE := a.out
MAKE_DSYM := NO
LD_EXTRAS = -dynamiclib -image_base 0x8000
LD_EXTRAS = -dynamiclib -image_base 0x8000 -framework Foundation
CFLAGS = $(CFLAGS_NO_DEBUG)
include Makefile.rules
@ -10,4 +10,4 @@ all: a.out a.out-stripped
a.out-stripped:
cp a.out a.out-stripped
strip a.out-stripped
strip -N a.out-stripped

View File

@ -85,3 +85,24 @@ class DyldTrieSymbolsTestCase(TestBase):
stripped_bar_symbols = stripped_target.FindSymbols("bar")
self.assertEqual(stripped_bar_symbols.GetSize(), 0)
# confirm that we classified objc runtime symbols correctly and
# stripped off the objc prefix from the symbol names.
syms_ctx = stripped_target.FindSymbols("SourceBase")
self.assertEqual(syms_ctx.GetSize(), 2)
sym1 = syms_ctx.GetContextAtIndex(0).GetSymbol()
sym2 = syms_ctx.GetContextAtIndex(1).GetSymbol()
# one of these should be a lldb.eSymbolTypeObjCClass, the other
# should be lldb.eSymbolTypeObjCMetaClass.
if sym1.GetType() == lldb.eSymbolTypeObjCMetaClass:
self.assertEqual(sym2.GetType(), lldb.eSymbolTypeObjCClass)
else:
if sym1.GetType() == lldb.eSymbolTypeObjCClass:
self.assertEqual(sym2.GetType(), lldb.eSymbolTypeObjCMetaClass)
else:
self.assertTrue(sym1.GetType() == lldb.eSymbolTypeObjCMetaClass or sym1.GetType() == lldb.eSymbolTypeObjCClass)
syms_ctx = stripped_target.FindSymbols("SourceDerived._derivedValue")
self.assertEqual(syms_ctx.GetSize(), 1)
sym = syms_ctx.GetContextAtIndex(0).GetSymbol()
self.assertEqual(sym.GetType(), lldb.eSymbolTypeObjCIVar)

View File

@ -1,29 +0,0 @@
int patval; // external symbol, will not be completely stripped
int pat(int in) { // external symbol, will not be completely stripped
if (patval == 0)
patval = in;
return patval;
}
static int fooval; // static symbol, stripped
int foo() { // external symbol, will not be completely stripped
if (fooval == 0)
fooval = 5;
return fooval;
}
int bazval = 10; // external symbol, will not be completely stripped
int baz () { // external symbol, will not be completely stripped
return foo() + bazval;
}
static int barval = 15; // static symbol, stripped
static int bar () { // static symbol, stripped; __lldb_unnamed_symbol from func starts
return baz() + barval;
}
int calculate () // external symbol, will not be completely stripped
{
return bar();
}

View File

@ -0,0 +1,149 @@
#import <Foundation/Foundation.h>
// SourceBase will be the base class of Source. We'll pass a Source object into a
// function as a SourceBase, and then see if the dynamic typing can get us through the KVO
// goo and all the way back to Source.
@interface SourceBase: NSObject
{
uint32_t _value;
}
- (SourceBase *) init;
- (uint32_t) getValue;
@end
@implementation SourceBase
- (SourceBase *) init
{
[super init];
_value = 10;
return self;
}
- (uint32_t) getValue
{
return _value;
}
@end
// Source is a class that will be observed by the Observer class below.
// When Observer sets itself up to observe this property (in initWithASource)
// the KVO system will overwrite the "isa" pointer of the object with the "kvo'ed"
// one.
@interface Source : SourceBase
{
int _property;
}
- (Source *) init;
- (void) setProperty: (int) newValue;
@end
@implementation Source
- (Source *) init
{
[super init];
_property = 20;
return self;
}
- (void) setProperty: (int) newValue
{
_property = newValue; // This is the line in setProperty, make sure we step to here.
}
@end
@interface SourceDerived : Source
{
int _derivedValue;
}
- (SourceDerived *) init;
- (uint32_t) getValue;
@end
@implementation SourceDerived
- (SourceDerived *) init
{
[super init];
_derivedValue = 30;
return self;
}
- (uint32_t) getValue
{
return _derivedValue;
}
@end
// Observer is the object that will watch Source and cause KVO to swizzle it...
@interface Observer : NSObject
{
Source *_source;
}
+ (Observer *) observerWithSource: (Source *) source;
- (Observer *) initWithASource: (Source *) source;
- (void) observeValueForKeyPath: (NSString *) path
ofObject: (id) object
change: (NSDictionary *) change
context: (void *) context;
@end
@implementation Observer
+ (Observer *) observerWithSource: (Source *) inSource;
{
Observer *retval;
retval = [[Observer alloc] initWithASource: inSource];
return retval;
}
- (Observer *) initWithASource: (Source *) source
{
[super init];
_source = source;
[_source addObserver: self
forKeyPath: @"property"
options: (NSKeyValueObservingOptionNew | NSKeyValueObservingOptionOld)
context: NULL];
return self;
}
- (void) observeValueForKeyPath: (NSString *) path
ofObject: (id) object
change: (NSDictionary *) change
context: (void *) context
{
printf ("Observer function called.\n");
return;
}
@end
int patval; // external symbol, will not be completely stripped
int pat(int in) { // external symbol, will not be completely stripped
if (patval == 0)
patval = in;
return patval;
}
static int fooval; // static symbol, stripped
int foo() { // external symbol, will not be completely stripped
if (fooval == 0)
fooval = 5;
return fooval;
}
int bazval = 10; // external symbol, will not be completely stripped
int baz () { // external symbol, will not be completely stripped
return foo() + bazval;
}
static int barval = 15; // static symbol, stripped
static int bar () { // static symbol, stripped; __lldb_unnamed_symbol from func starts
return baz() + barval;
}
int calculate () // external symbol, will not be completely stripped
{
return bar();
}