mirror of https://github.com/ByConity/ByConity
Advancements
This commit is contained in:
parent
daa36650fb
commit
15dc6d1818
|
@ -52,7 +52,7 @@ class Elf;
|
||||||
* kept as a vector of strings instead of re-executing the program to look for
|
* kept as a vector of strings instead of re-executing the program to look for
|
||||||
* DW_LNE_define_file instructions, etc.
|
* DW_LNE_define_file instructions, etc.
|
||||||
*/
|
*/
|
||||||
class Dwarf
|
class Dwarf final
|
||||||
{
|
{
|
||||||
// Note that Dwarf uses (and returns) std::string_view a lot.
|
// Note that Dwarf uses (and returns) std::string_view a lot.
|
||||||
// The std::string_view point within sections in the ELF file, and so will
|
// The std::string_view point within sections in the ELF file, and so will
|
||||||
|
|
|
@ -17,8 +17,8 @@ Elf::Elf(const std::string & path)
|
||||||
: in(path, 0)
|
: in(path, 0)
|
||||||
{
|
{
|
||||||
/// Check if it's an elf.
|
/// Check if it's an elf.
|
||||||
size = in.buffer().size();
|
elf_size = in.buffer().size();
|
||||||
if (size < sizeof(ElfEhdr))
|
if (elf_size < sizeof(ElfEhdr))
|
||||||
throw Exception("The size of supposedly ELF file is too small", ErrorCodes::CANNOT_PARSE_ELF);
|
throw Exception("The size of supposedly ELF file is too small", ErrorCodes::CANNOT_PARSE_ELF);
|
||||||
|
|
||||||
mapped = in.buffer().begin();
|
mapped = in.buffer().begin();
|
||||||
|
@ -33,7 +33,7 @@ Elf::Elf(const std::string & path)
|
||||||
|
|
||||||
if (!section_header_offset
|
if (!section_header_offset
|
||||||
|| !section_header_num_entries
|
|| !section_header_num_entries
|
||||||
|| section_header_offset + section_header_num_entries * sizeof(ElfShdr) > size)
|
|| section_header_offset + section_header_num_entries * sizeof(ElfShdr) > elf_size)
|
||||||
throw Exception("The ELF is truncated (section header points after end of file)", ErrorCodes::CANNOT_PARSE_ELF);
|
throw Exception("The ELF is truncated (section header points after end of file)", ErrorCodes::CANNOT_PARSE_ELF);
|
||||||
|
|
||||||
section_headers = reinterpret_cast<const ElfShdr *>(mapped + section_header_offset);
|
section_headers = reinterpret_cast<const ElfShdr *>(mapped + section_header_offset);
|
||||||
|
@ -48,7 +48,7 @@ Elf::Elf(const std::string & path)
|
||||||
throw Exception("The ELF doesn't have string table with section names", ErrorCodes::CANNOT_PARSE_ELF);
|
throw Exception("The ELF doesn't have string table with section names", ErrorCodes::CANNOT_PARSE_ELF);
|
||||||
|
|
||||||
ElfOff section_names_offset = section_names_strtab->header.sh_offset;
|
ElfOff section_names_offset = section_names_strtab->header.sh_offset;
|
||||||
if (section_names_offset >= size)
|
if (section_names_offset >= elf_size)
|
||||||
throw Exception("The ELF is truncated (section names string table points after end of file)", ErrorCodes::CANNOT_PARSE_ELF);
|
throw Exception("The ELF is truncated (section names string table points after end of file)", ErrorCodes::CANNOT_PARSE_ELF);
|
||||||
|
|
||||||
section_names = reinterpret_cast<const char *>(mapped + section_names_offset);
|
section_names = reinterpret_cast<const char *>(mapped + section_names_offset);
|
||||||
|
@ -68,7 +68,7 @@ bool Elf::iterateSections(std::function<bool(const Section & section, size_t idx
|
||||||
Section section(section_headers[idx], *this);
|
Section section(section_headers[idx], *this);
|
||||||
|
|
||||||
/// Sections spans after end of file.
|
/// Sections spans after end of file.
|
||||||
if (section.header.sh_offset + section.header.sh_size > size)
|
if (section.header.sh_offset + section.header.sh_size > elf_size)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (pred(section, idx))
|
if (pred(section, idx))
|
||||||
|
|
|
@ -41,17 +41,19 @@ public:
|
||||||
const Elf & elf;
|
const Elf & elf;
|
||||||
};
|
};
|
||||||
|
|
||||||
Elf(const std::string & path);
|
explicit Elf(const std::string & path);
|
||||||
|
|
||||||
bool iterateSections(std::function<bool(const Section & section, size_t idx)> && pred) const;
|
bool iterateSections(std::function<bool(const Section & section, size_t idx)> && pred) const;
|
||||||
std::optional<Section> findSection(std::function<bool(const Section & section, size_t idx)> && pred) const;
|
std::optional<Section> findSection(std::function<bool(const Section & section, size_t idx)> && pred) const;
|
||||||
std::optional<Section> findSectionByName(const char * name) const;
|
std::optional<Section> findSectionByName(const char * name) const;
|
||||||
|
|
||||||
const char * end() const { return mapped + size; }
|
const char * begin() const { return mapped; }
|
||||||
|
const char * end() const { return mapped + elf_size; }
|
||||||
|
size_t size() const { return elf_size; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MMapReadBufferFromFile in;
|
MMapReadBufferFromFile in;
|
||||||
size_t size;
|
size_t elf_size;
|
||||||
const char * mapped;
|
const char * mapped;
|
||||||
const ElfEhdr * header;
|
const ElfEhdr * header;
|
||||||
const ElfShdr * section_headers;
|
const ElfShdr * section_headers;
|
||||||
|
|
|
@ -24,7 +24,8 @@ namespace
|
||||||
|
|
||||||
/// Based on the code of musl-libc and the answer of Kanalpiroge on
|
/// Based on the code of musl-libc and the answer of Kanalpiroge on
|
||||||
/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
|
/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
|
||||||
void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vector<DB::SymbolIndex::Symbol> & symbols)
|
void collectSymbolsFromProgramHeaders(dl_phdr_info * info,
|
||||||
|
std::vector<DB::SymbolIndex::Symbol> & symbols)
|
||||||
{
|
{
|
||||||
/* Iterate over all headers of the current shared lib
|
/* Iterate over all headers of the current shared lib
|
||||||
* (first call is for the executable itself) */
|
* (first call is for the executable itself) */
|
||||||
|
@ -40,8 +41,6 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vector<DB::Symbo
|
||||||
*/
|
*/
|
||||||
const ElfW(Dyn) * dyn_begin = reinterpret_cast<const ElfW(Dyn) *>(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr);
|
const ElfW(Dyn) * dyn_begin = reinterpret_cast<const ElfW(Dyn) *>(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr);
|
||||||
|
|
||||||
// std::cerr << "dlpi_addr: " << info->dlpi_addr << "\n";
|
|
||||||
|
|
||||||
/// For unknown reason, addresses are sometimes relative sometimes absolute.
|
/// For unknown reason, addresses are sometimes relative sometimes absolute.
|
||||||
auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr)
|
auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr)
|
||||||
{
|
{
|
||||||
|
@ -53,25 +52,17 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vector<DB::Symbo
|
||||||
* an entry with d_tag == DT_NULL.
|
* an entry with d_tag == DT_NULL.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it)
|
|
||||||
std::cerr << it->d_tag << "\n";*/
|
|
||||||
|
|
||||||
size_t sym_cnt = 0;
|
size_t sym_cnt = 0;
|
||||||
for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it)
|
for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it)
|
||||||
{
|
{
|
||||||
if (it->d_tag == DT_HASH)
|
if (it->d_tag == DT_HASH)
|
||||||
{
|
{
|
||||||
const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
|
const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
|
||||||
|
|
||||||
// std::cerr << it->d_un.d_ptr << ", " << it->d_un.d_val << "\n";
|
|
||||||
|
|
||||||
sym_cnt = hash[1];
|
sym_cnt = hash[1];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else if (it->d_tag == DT_GNU_HASH)
|
else if (it->d_tag == DT_GNU_HASH)
|
||||||
{
|
{
|
||||||
// std::cerr << it->d_un.d_ptr << ", " << it->d_un.d_val << "\n";
|
|
||||||
|
|
||||||
/// This code based on Musl-libc.
|
/// This code based on Musl-libc.
|
||||||
|
|
||||||
const uint32_t * buckets = nullptr;
|
const uint32_t * buckets = nullptr;
|
||||||
|
@ -100,7 +91,6 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vector<DB::Symbo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// std::cerr << "sym_cnt: " << sym_cnt << "\n";
|
|
||||||
if (!sym_cnt)
|
if (!sym_cnt)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -117,8 +107,6 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vector<DB::Symbo
|
||||||
if (!strtab)
|
if (!strtab)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// std::cerr << "Having strtab" << "\n";
|
|
||||||
|
|
||||||
for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it)
|
for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it)
|
||||||
{
|
{
|
||||||
if (it->d_tag == DT_SYMTAB)
|
if (it->d_tag == DT_SYMTAB)
|
||||||
|
@ -141,8 +129,6 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vector<DB::Symbo
|
||||||
if (!sym_name)
|
if (!sym_name)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// std::cerr << sym_name << "\n";
|
|
||||||
|
|
||||||
DB::SymbolIndex::Symbol symbol;
|
DB::SymbolIndex::Symbol symbol;
|
||||||
symbol.address_begin = reinterpret_cast<const void *>(info->dlpi_addr + elf_sym[sym_index].st_value);
|
symbol.address_begin = reinterpret_cast<const void *>(info->dlpi_addr + elf_sym[sym_index].st_value);
|
||||||
symbol.address_end = reinterpret_cast<const void *>(info->dlpi_addr + elf_sym[sym_index].st_value + elf_sym[sym_index].st_size);
|
symbol.address_end = reinterpret_cast<const void *>(info->dlpi_addr + elf_sym[sym_index].st_value + elf_sym[sym_index].st_size);
|
||||||
|
@ -226,7 +212,9 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void collectSymbolsFromELF(dl_phdr_info * info, std::vector<DB::SymbolIndex::Symbol> & symbols)
|
void collectSymbolsFromELF(dl_phdr_info * info,
|
||||||
|
std::vector<DB::SymbolIndex::Symbol> & symbols,
|
||||||
|
std::vector<DB::SymbolIndex::Object> & objects)
|
||||||
{
|
{
|
||||||
std::string object_name = info->dlpi_name;
|
std::string object_name = info->dlpi_name;
|
||||||
|
|
||||||
|
@ -244,6 +232,12 @@ void collectSymbolsFromELF(dl_phdr_info * info, std::vector<DB::SymbolIndex::Sym
|
||||||
|
|
||||||
DB::Elf elf(object_name);
|
DB::Elf elf(object_name);
|
||||||
|
|
||||||
|
DB::SymbolIndex::Object object;
|
||||||
|
object.address_begin = reinterpret_cast<const void *>(info->dlpi_addr);
|
||||||
|
object.address_end = reinterpret_cast<const void *>(info->dlpi_addr + elf.size());
|
||||||
|
object.name = object_name;
|
||||||
|
objects.push_back(std::move(object));
|
||||||
|
|
||||||
searchAndCollectSymbolsFromELFSymbolTable(info, elf, SHT_SYMTAB, ".strtab", symbols);
|
searchAndCollectSymbolsFromELFSymbolTable(info, elf, SHT_SYMTAB, ".strtab", symbols);
|
||||||
searchAndCollectSymbolsFromELFSymbolTable(info, elf, SHT_DYNSYM, ".dynstr", symbols);
|
searchAndCollectSymbolsFromELFSymbolTable(info, elf, SHT_DYNSYM, ".dynstr", symbols);
|
||||||
}
|
}
|
||||||
|
@ -253,21 +247,41 @@ void collectSymbolsFromELF(dl_phdr_info * info, std::vector<DB::SymbolIndex::Sym
|
||||||
* Is called by dl_iterate_phdr for every loaded shared lib until something
|
* Is called by dl_iterate_phdr for every loaded shared lib until something
|
||||||
* else than 0 is returned by one call of this function.
|
* else than 0 is returned by one call of this function.
|
||||||
*/
|
*/
|
||||||
int collectSymbols(dl_phdr_info * info, size_t, void * out_symbols)
|
int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr)
|
||||||
{
|
{
|
||||||
/* ElfW is a macro that creates proper typenames for the used system architecture
|
/* ElfW is a macro that creates proper typenames for the used system architecture
|
||||||
* (e.g. on a 32 bit system, ElfW(Dyn*) becomes "Elf32_Dyn*")
|
* (e.g. on a 32 bit system, ElfW(Dyn*) becomes "Elf32_Dyn*")
|
||||||
*/
|
*/
|
||||||
|
|
||||||
std::vector<DB::SymbolIndex::Symbol> & symbols = *reinterpret_cast<std::vector<DB::SymbolIndex::Symbol> *>(out_symbols);
|
DB::SymbolIndex::Data & data = *reinterpret_cast<DB::SymbolIndex::Data *>(data_ptr);
|
||||||
|
|
||||||
collectSymbolsFromProgramHeaders(info, symbols);
|
collectSymbolsFromProgramHeaders(info, data.symbols);
|
||||||
collectSymbolsFromELF(info, symbols);
|
collectSymbolsFromELF(info, data.symbols, data.objects);
|
||||||
|
|
||||||
/* Continue iterations */
|
/* Continue iterations */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
const T * find(const void * address, const std::vector<T> & vec)
|
||||||
|
{
|
||||||
|
/// First range that has left boundary greater than address.
|
||||||
|
|
||||||
|
auto it = std::lower_bound(vec.begin(), vec.end(), address,
|
||||||
|
[](const T & symbol, const void * addr) { return symbol.address_begin <= addr; });
|
||||||
|
|
||||||
|
if (it == vec.begin())
|
||||||
|
return nullptr;
|
||||||
|
else
|
||||||
|
--it; /// Last range that has left boundary less or equals than address.
|
||||||
|
|
||||||
|
if (address >= it->address_begin && address < it->address_end)
|
||||||
|
return &*it;
|
||||||
|
else
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -276,28 +290,18 @@ namespace DB
|
||||||
|
|
||||||
void SymbolIndex::update()
|
void SymbolIndex::update()
|
||||||
{
|
{
|
||||||
dl_iterate_phdr(collectSymbols, &symbols);
|
dl_iterate_phdr(collectSymbols, &data.symbols);
|
||||||
std::sort(symbols.begin(), symbols.end());
|
std::sort(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b) { return a.address_begin < b.address_begin; });
|
||||||
}
|
}
|
||||||
|
|
||||||
const SymbolIndex::Symbol * SymbolIndex::find(const void * address) const
|
const SymbolIndex::Symbol * SymbolIndex::findSymbol(const void * address) const
|
||||||
{
|
{
|
||||||
/// First range that has left boundary greater than address.
|
return find(address, data.symbols);
|
||||||
|
}
|
||||||
|
|
||||||
// std::cerr << "Searching " << address << "\n";
|
const SymbolIndex::Object * SymbolIndex::findObject(const void * address) const
|
||||||
|
{
|
||||||
auto it = std::lower_bound(symbols.begin(), symbols.end(), address);
|
return find(address, data.objects);
|
||||||
if (it == symbols.begin())
|
|
||||||
return nullptr;
|
|
||||||
else
|
|
||||||
--it; /// Last range that has left boundary less or equals than address.
|
|
||||||
|
|
||||||
// std::cerr << "Range: " << it->address_begin << " ... " << it->address_end << "\n";
|
|
||||||
|
|
||||||
if (address >= it->address_begin && address < it->address_end)
|
|
||||||
return &*it;
|
|
||||||
else
|
|
||||||
return nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,21 +19,31 @@ public:
|
||||||
const void * address_end;
|
const void * address_end;
|
||||||
const char * object;
|
const char * object;
|
||||||
std::string name; /// demangled NOTE Can use Arena for strings
|
std::string name; /// demangled NOTE Can use Arena for strings
|
||||||
|
};
|
||||||
|
|
||||||
bool operator< (const Symbol & rhs) const { return address_begin < rhs.address_begin; }
|
struct Object
|
||||||
bool operator< (const void * addr) const { return address_begin <= addr; }
|
{
|
||||||
|
const void * address_begin;
|
||||||
|
const void * address_end;
|
||||||
|
std::string name;
|
||||||
};
|
};
|
||||||
|
|
||||||
SymbolIndex() { update(); }
|
SymbolIndex() { update(); }
|
||||||
void update();
|
void update();
|
||||||
|
|
||||||
const Symbol * find(const void * address) const;
|
const Symbol * findSymbol(const void * address) const;
|
||||||
|
const Object * findObject(const void * address) const;
|
||||||
|
|
||||||
auto begin() const { return symbols.cbegin(); }
|
const std::vector<Symbol> & symbols() const { return data.symbols; }
|
||||||
auto end() const { return symbols.cend(); }
|
const std::vector<Object> & objects() const { return data.objects; }
|
||||||
|
|
||||||
private:
|
struct Data
|
||||||
|
{
|
||||||
std::vector<Symbol> symbols;
|
std::vector<Symbol> symbols;
|
||||||
|
std::vector<Object> objects;
|
||||||
|
};
|
||||||
|
private:
|
||||||
|
Data data;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
#include <Common/SymbolIndex.h>
|
#include <Common/SymbolIndex.h>
|
||||||
|
#include <Common/Elf.h>
|
||||||
|
#include <Common/Dwarf.h>
|
||||||
#include <common/demangle.h>
|
#include <common/demangle.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
|
@ -18,22 +20,31 @@ int main(int argc, char ** argv)
|
||||||
|
|
||||||
SymbolIndex symbol_index;
|
SymbolIndex symbol_index;
|
||||||
|
|
||||||
for (const auto & symbol : symbol_index)
|
for (const auto & elem : symbol_index.objects())
|
||||||
std::cout << symbol.name << ": " << symbol.address_begin << " ... " << symbol.address_end << "\n";
|
std::cout << elem.name << ": " << elem.address_begin << " ... " << elem.address_end << "\n";
|
||||||
|
|
||||||
const void * address = reinterpret_cast<void*>(std::stoull(argv[1], nullptr, 16));
|
const void * address = reinterpret_cast<void*>(std::stoull(argv[1], nullptr, 16));
|
||||||
|
|
||||||
auto symbol = symbol_index.find(address);
|
auto symbol = symbol_index.findSymbol(address);
|
||||||
if (symbol)
|
if (symbol)
|
||||||
std::cerr << symbol->name << ": " << symbol->address_begin << " ... " << symbol->address_end << "\n";
|
std::cerr << symbol->name << ": " << symbol->address_begin << " ... " << symbol->address_end << "\n";
|
||||||
else
|
else
|
||||||
std::cerr << "Not found\n";
|
std::cerr << "SymbolIndex: Not found\n";
|
||||||
|
|
||||||
Dl_info info;
|
Dl_info info;
|
||||||
if (dladdr(address, &info) && info.dli_sname)
|
if (dladdr(address, &info) && info.dli_sname)
|
||||||
std::cerr << demangle(info.dli_sname) << ": " << info.dli_saddr << "\n";
|
std::cerr << demangle(info.dli_sname) << ": " << info.dli_saddr << "\n";
|
||||||
else
|
else
|
||||||
std::cerr << "Not found\n";
|
std::cerr << "dladdr: Not found\n";
|
||||||
|
|
||||||
|
Elf elf("/proc/self/exe");
|
||||||
|
Dwarf dwarf(elf);
|
||||||
|
|
||||||
|
Dwarf::LocationInfo location;
|
||||||
|
if (dwarf.findAddress(uintptr_t(address), location, Dwarf::LocationInfoMode::FULL))
|
||||||
|
std::cerr << location.file.toString() << ":" << location.line << "\n";
|
||||||
|
else
|
||||||
|
std::cerr << "Dwarf: Not found\n";
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,7 +73,7 @@ public:
|
||||||
|
|
||||||
for (size_t i = 0; i < input_rows_count; ++i)
|
for (size_t i = 0; i < input_rows_count; ++i)
|
||||||
{
|
{
|
||||||
if (const auto * symbol = symbol_index.find(reinterpret_cast<const void *>(data[i])))
|
if (const auto * symbol = symbol_index.findSymbol(reinterpret_cast<const void *>(data[i])))
|
||||||
result_column->insertDataWithTerminatingZero(symbol->name.data(), symbol->name.size() + 1);
|
result_column->insertDataWithTerminatingZero(symbol->name.data(), symbol->name.size() + 1);
|
||||||
else
|
else
|
||||||
result_column->insertDefault();
|
result_column->insertDefault();
|
||||||
|
|
Loading…
Reference in New Issue