diff --git a/dbms/src/Common/Dwarf.h b/dbms/src/Common/Dwarf.h index 48e2392225..5bc358df86 100644 --- a/dbms/src/Common/Dwarf.h +++ b/dbms/src/Common/Dwarf.h @@ -52,7 +52,7 @@ class Elf; * kept as a vector of strings instead of re-executing the program to look for * DW_LNE_define_file instructions, etc. */ -class Dwarf +class Dwarf final { // Note that Dwarf uses (and returns) std::string_view a lot. // The std::string_view point within sections in the ELF file, and so will @@ -126,8 +126,8 @@ public: }; /** - * Find the file and line number information corresponding to address. - */ + * Find the file and line number information corresponding to address. + */ bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode) const; private: diff --git a/dbms/src/Common/Elf.cpp b/dbms/src/Common/Elf.cpp index 05767dfe60..bb51b837a1 100644 --- a/dbms/src/Common/Elf.cpp +++ b/dbms/src/Common/Elf.cpp @@ -17,8 +17,8 @@ Elf::Elf(const std::string & path) : in(path, 0) { /// Check if it's an elf. - size = in.buffer().size(); - if (size < sizeof(ElfEhdr)) + elf_size = in.buffer().size(); + if (elf_size < sizeof(ElfEhdr)) throw Exception("The size of supposedly ELF file is too small", ErrorCodes::CANNOT_PARSE_ELF); mapped = in.buffer().begin(); @@ -33,7 +33,7 @@ Elf::Elf(const std::string & path) if (!section_header_offset || !section_header_num_entries - || section_header_offset + section_header_num_entries * sizeof(ElfShdr) > size) + || section_header_offset + section_header_num_entries * sizeof(ElfShdr) > elf_size) throw Exception("The ELF is truncated (section header points after end of file)", ErrorCodes::CANNOT_PARSE_ELF); section_headers = reinterpret_cast(mapped + section_header_offset); @@ -48,7 +48,7 @@ Elf::Elf(const std::string & path) throw Exception("The ELF doesn't have string table with section names", ErrorCodes::CANNOT_PARSE_ELF); ElfOff section_names_offset = section_names_strtab->header.sh_offset; - if (section_names_offset >= size) + if (section_names_offset >= elf_size) throw Exception("The ELF is truncated (section names string table points after end of file)", ErrorCodes::CANNOT_PARSE_ELF); section_names = reinterpret_cast(mapped + section_names_offset); @@ -68,7 +68,7 @@ bool Elf::iterateSections(std::function size) + if (section.header.sh_offset + section.header.sh_size > elf_size) continue; if (pred(section, idx)) diff --git a/dbms/src/Common/Elf.h b/dbms/src/Common/Elf.h index f9f615dac3..7f7fcc538b 100644 --- a/dbms/src/Common/Elf.h +++ b/dbms/src/Common/Elf.h @@ -41,17 +41,19 @@ public: const Elf & elf; }; - Elf(const std::string & path); + explicit Elf(const std::string & path); bool iterateSections(std::function && pred) const; std::optional
findSection(std::function && pred) const; std::optional
findSectionByName(const char * name) const; - const char * end() const { return mapped + size; } + const char * begin() const { return mapped; } + const char * end() const { return mapped + elf_size; } + size_t size() const { return elf_size; } private: MMapReadBufferFromFile in; - size_t size; + size_t elf_size; const char * mapped; const ElfEhdr * header; const ElfShdr * section_headers; diff --git a/dbms/src/Common/SymbolIndex.cpp b/dbms/src/Common/SymbolIndex.cpp index 24fc93aec9..b315abead7 100644 --- a/dbms/src/Common/SymbolIndex.cpp +++ b/dbms/src/Common/SymbolIndex.cpp @@ -24,7 +24,8 @@ namespace /// Based on the code of musl-libc and the answer of Kanalpiroge on /// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture -void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vector & symbols) +void collectSymbolsFromProgramHeaders(dl_phdr_info * info, + std::vector & symbols) { /* Iterate over all headers of the current shared lib * (first call is for the executable itself) */ @@ -40,8 +41,6 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vector(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr); -// std::cerr << "dlpi_addr: " << info->dlpi_addr << "\n"; - /// For unknown reason, addresses are sometimes relative sometimes absolute. auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr) { @@ -53,25 +52,17 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vectord_tag != DT_NULL; ++it) - std::cerr << it->d_tag << "\n";*/ - size_t sym_cnt = 0; for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it) { if (it->d_tag == DT_HASH) { const ElfW(Word) * hash = reinterpret_cast(correct_address(info->dlpi_addr, it->d_un.d_ptr)); - -// std::cerr << it->d_un.d_ptr << ", " << it->d_un.d_val << "\n"; - sym_cnt = hash[1]; break; } else if (it->d_tag == DT_GNU_HASH) { -// std::cerr << it->d_un.d_ptr << ", " << it->d_un.d_val << "\n"; - /// This code based on Musl-libc. const uint32_t * buckets = nullptr; @@ -100,7 +91,6 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vectord_tag != DT_NULL; ++it) { if (it->d_tag == DT_SYMTAB) @@ -141,8 +129,6 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, std::vector(info->dlpi_addr + elf_sym[sym_index].st_value); symbol.address_end = reinterpret_cast(info->dlpi_addr + elf_sym[sym_index].st_value + elf_sym[sym_index].st_size); @@ -226,7 +212,9 @@ bool searchAndCollectSymbolsFromELFSymbolTable( } -void collectSymbolsFromELF(dl_phdr_info * info, std::vector & symbols) +void collectSymbolsFromELF(dl_phdr_info * info, + std::vector & symbols, + std::vector & objects) { std::string object_name = info->dlpi_name; @@ -244,6 +232,12 @@ void collectSymbolsFromELF(dl_phdr_info * info, std::vector(info->dlpi_addr); + object.address_end = reinterpret_cast(info->dlpi_addr + elf.size()); + object.name = object_name; + objects.push_back(std::move(object)); + searchAndCollectSymbolsFromELFSymbolTable(info, elf, SHT_SYMTAB, ".strtab", symbols); searchAndCollectSymbolsFromELFSymbolTable(info, elf, SHT_DYNSYM, ".dynstr", symbols); } @@ -253,21 +247,41 @@ void collectSymbolsFromELF(dl_phdr_info * info, std::vector & symbols = *reinterpret_cast *>(out_symbols); + DB::SymbolIndex::Data & data = *reinterpret_cast(data_ptr); - collectSymbolsFromProgramHeaders(info, symbols); - collectSymbolsFromELF(info, symbols); + collectSymbolsFromProgramHeaders(info, data.symbols); + collectSymbolsFromELF(info, data.symbols, data.objects); /* Continue iterations */ return 0; } + +template +const T * find(const void * address, const std::vector & vec) +{ + /// First range that has left boundary greater than address. + + auto it = std::lower_bound(vec.begin(), vec.end(), address, + [](const T & symbol, const void * addr) { return symbol.address_begin <= addr; }); + + if (it == vec.begin()) + return nullptr; + else + --it; /// Last range that has left boundary less or equals than address. + + if (address >= it->address_begin && address < it->address_end) + return &*it; + else + return nullptr; +} + } @@ -276,28 +290,18 @@ namespace DB void SymbolIndex::update() { - dl_iterate_phdr(collectSymbols, &symbols); - std::sort(symbols.begin(), symbols.end()); + dl_iterate_phdr(collectSymbols, &data.symbols); + std::sort(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b) { return a.address_begin < b.address_begin; }); } -const SymbolIndex::Symbol * SymbolIndex::find(const void * address) const +const SymbolIndex::Symbol * SymbolIndex::findSymbol(const void * address) const { - /// First range that has left boundary greater than address. + return find(address, data.symbols); +} -// std::cerr << "Searching " << address << "\n"; - - auto it = std::lower_bound(symbols.begin(), symbols.end(), address); - if (it == symbols.begin()) - return nullptr; - else - --it; /// Last range that has left boundary less or equals than address. - -// std::cerr << "Range: " << it->address_begin << " ... " << it->address_end << "\n"; - - if (address >= it->address_begin && address < it->address_end) - return &*it; - else - return nullptr; +const SymbolIndex::Object * SymbolIndex::findObject(const void * address) const +{ + return find(address, data.objects); } } diff --git a/dbms/src/Common/SymbolIndex.h b/dbms/src/Common/SymbolIndex.h index 41c7a10648..9d1dceb2c9 100644 --- a/dbms/src/Common/SymbolIndex.h +++ b/dbms/src/Common/SymbolIndex.h @@ -19,21 +19,31 @@ public: const void * address_end; const char * object; std::string name; /// demangled NOTE Can use Arena for strings + }; - bool operator< (const Symbol & rhs) const { return address_begin < rhs.address_begin; } - bool operator< (const void * addr) const { return address_begin <= addr; } + struct Object + { + const void * address_begin; + const void * address_end; + std::string name; }; SymbolIndex() { update(); } void update(); - const Symbol * find(const void * address) const; + const Symbol * findSymbol(const void * address) const; + const Object * findObject(const void * address) const; - auto begin() const { return symbols.cbegin(); } - auto end() const { return symbols.cend(); } + const std::vector & symbols() const { return data.symbols; } + const std::vector & objects() const { return data.objects; } + struct Data + { + std::vector symbols; + std::vector objects; + }; private: - std::vector symbols; + Data data; }; } diff --git a/dbms/src/Common/tests/symbol_index.cpp b/dbms/src/Common/tests/symbol_index.cpp index a9fec7069e..37a044939b 100644 --- a/dbms/src/Common/tests/symbol_index.cpp +++ b/dbms/src/Common/tests/symbol_index.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include #include #include @@ -18,22 +20,31 @@ int main(int argc, char ** argv) SymbolIndex symbol_index; - for (const auto & symbol : symbol_index) - std::cout << symbol.name << ": " << symbol.address_begin << " ... " << symbol.address_end << "\n"; + for (const auto & elem : symbol_index.objects()) + std::cout << elem.name << ": " << elem.address_begin << " ... " << elem.address_end << "\n"; const void * address = reinterpret_cast(std::stoull(argv[1], nullptr, 16)); - auto symbol = symbol_index.find(address); + auto symbol = symbol_index.findSymbol(address); if (symbol) std::cerr << symbol->name << ": " << symbol->address_begin << " ... " << symbol->address_end << "\n"; else - std::cerr << "Not found\n"; + std::cerr << "SymbolIndex: Not found\n"; Dl_info info; if (dladdr(address, &info) && info.dli_sname) std::cerr << demangle(info.dli_sname) << ": " << info.dli_saddr << "\n"; else - std::cerr << "Not found\n"; + std::cerr << "dladdr: Not found\n"; + + Elf elf("/proc/self/exe"); + Dwarf dwarf(elf); + + Dwarf::LocationInfo location; + if (dwarf.findAddress(uintptr_t(address), location, Dwarf::LocationInfoMode::FULL)) + std::cerr << location.file.toString() << ":" << location.line << "\n"; + else + std::cerr << "Dwarf: Not found\n"; return 0; } diff --git a/dbms/src/Functions/symbolizeAddress.cpp b/dbms/src/Functions/symbolizeAddress.cpp index 65c1aa84d3..b4fef64981 100644 --- a/dbms/src/Functions/symbolizeAddress.cpp +++ b/dbms/src/Functions/symbolizeAddress.cpp @@ -73,7 +73,7 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { - if (const auto * symbol = symbol_index.find(reinterpret_cast(data[i]))) + if (const auto * symbol = symbol_index.findSymbol(reinterpret_cast(data[i]))) result_column->insertDataWithTerminatingZero(symbol->name.data(), symbol->name.size() + 1); else result_column->insertDefault();