Replace HashStringUsingDJB with llvm::djbHash

Summary:
The llvm function is equivalent to this one. Where possible I tried to
replace const char* with llvm::StringRef to avoid extra strlen
computations. In most places, I was able to track the c string back to
the ConstString it was created from.

I also create a test that verifies we are able to lookup names with
unicode characters, as a bug in the llvm compiler (it accidentally used
a different hash function) meant this was not working until recently.

This also removes the unused ExportTable class.

Reviewers: aprantl, davide

Subscribers: JDevlieghere, lldb-commits

Differential Revision: https://reviews.llvm.org/D43596

llvm-svn: 325927
This commit is contained in:
Pavel Labath 2018-02-23 17:49:26 +00:00
parent 523c656e25
commit b39fca958d
8 changed files with 81 additions and 222 deletions

View File

@ -24,6 +24,7 @@
// Project includes
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/Stream.h"
#include "llvm/Support/DJB.h"
class MappedHash {
public:
@ -32,22 +33,10 @@ public:
// by the ELF GNU_HASH sections
};
static uint32_t HashStringUsingDJB(const char *s) {
uint32_t h = 5381;
for (unsigned char c = *s; c; c = *++s)
h = ((h << 5) + h) + c;
return h;
}
static uint32_t HashString(uint32_t hash_function, const char *s) {
if (!s)
return 0;
static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) {
switch (hash_function) {
case MappedHash::eHashFunctionDJB:
return HashStringUsingDJB(s);
return llvm::djbHash(s);
default:
break;
@ -152,164 +141,6 @@ public:
// Write (int fd);
};
template <typename __KeyType, class __HeaderDataType, class __ValueType>
class ExportTable {
public:
typedef __HeaderDataType HeaderDataType;
typedef Header<HeaderDataType> HeaderType;
typedef __KeyType KeyType;
typedef __ValueType ValueType;
struct Entry {
uint32_t hash;
KeyType key;
ValueType value;
};
typedef std::vector<ValueType> ValueArrayType;
typedef std::map<KeyType, ValueArrayType> HashData;
// Map a name hash to one or more name infos
typedef std::map<uint32_t, HashData> HashToHashData;
virtual KeyType GetKeyForStringType(const char *cstr) const = 0;
virtual size_t GetByteSize(const HashData &key_to_key_values) = 0;
virtual bool WriteHashData(const HashData &hash_data,
lldb_private::Stream &ostrm) = 0;
//
void AddEntry(const char *cstr, const ValueType &value) {
Entry entry;
entry.hash = MappedHash::HashString(eHashFunctionDJB, cstr);
entry.key = GetKeyForStringType(cstr);
entry.value = value;
m_entries.push_back(entry);
}
void Save(const HeaderDataType &header_data, lldb_private::Stream &ostrm) {
if (m_entries.empty())
return;
const uint32_t num_entries = m_entries.size();
uint32_t i = 0;
HeaderType header;
header.magic = HASH_MAGIC;
header.version = 1;
header.hash_function = eHashFunctionDJB;
header.bucket_count = 0;
header.hashes_count = 0;
header.prologue_length = header_data.GetByteSize();
// We need to figure out the number of unique hashes first before we can
// calculate the number of buckets we want to use.
typedef std::vector<uint32_t> hash_coll;
hash_coll unique_hashes;
unique_hashes.resize(num_entries);
for (i = 0; i < num_entries; ++i)
unique_hashes[i] = m_entries[i].hash;
std::sort(unique_hashes.begin(), unique_hashes.end());
hash_coll::iterator pos =
std::unique(unique_hashes.begin(), unique_hashes.end());
const size_t num_unique_hashes =
std::distance(unique_hashes.begin(), pos);
if (num_unique_hashes > 1024)
header.bucket_count = num_unique_hashes / 4;
else if (num_unique_hashes > 16)
header.bucket_count = num_unique_hashes / 2;
else
header.bucket_count = num_unique_hashes;
if (header.bucket_count == 0)
header.bucket_count = 1;
std::vector<HashToHashData> hash_buckets;
std::vector<uint32_t> hash_indexes(header.bucket_count, 0);
std::vector<uint32_t> hash_values;
std::vector<uint32_t> hash_offsets;
hash_buckets.resize(header.bucket_count);
uint32_t bucket_entry_empties = 0;
// StreamString hash_file_data(Stream::eBinary,
// dwarf->GetObjectFile()->GetAddressByteSize(),
// dwarf->GetObjectFile()->GetByteSize());
// Push all of the hashes into their buckets and create all bucket
// entries all populated with data.
for (i = 0; i < num_entries; ++i) {
const uint32_t hash = m_entries[i].hash;
const uint32_t bucket_idx = hash % header.bucket_count;
const uint32_t strp_offset = m_entries[i].str_offset;
const uint32_t die_offset = m_entries[i].die_offset;
hash_buckets[bucket_idx][hash][strp_offset].push_back(die_offset);
}
// Now for each bucket we write the bucket value which is the
// number of hashes and the hash index encoded into a single
// 32 bit unsigned integer.
for (i = 0; i < header.bucket_count; ++i) {
HashToHashData &bucket_entry = hash_buckets[i];
if (bucket_entry.empty()) {
// Empty bucket
++bucket_entry_empties;
hash_indexes[i] = UINT32_MAX;
} else {
const uint32_t hash_value_index = hash_values.size();
uint32_t hash_count = 0;
typename HashToHashData::const_iterator pos, end = bucket_entry.end();
for (pos = bucket_entry.begin(); pos != end; ++pos) {
hash_values.push_back(pos->first);
hash_offsets.push_back(GetByteSize(pos->second));
++hash_count;
}
hash_indexes[i] = hash_value_index;
}
}
header.hashes_count = hash_values.size();
// Write the header out now that we have the hash_count
header.Write(ostrm);
// Now for each bucket we write the start index of the hashes
// for the current bucket, or UINT32_MAX if the bucket is empty
for (i = 0; i < header.bucket_count; ++i) {
ostrm.PutHex32(hash_indexes[i]);
}
// Now we need to write out all of the hash values
for (i = 0; i < header.hashes_count; ++i) {
ostrm.PutHex32(hash_values[i]);
}
// Now we need to write out all of the hash data offsets,
// there is an offset for each hash in the hashes array
// that was written out above
for (i = 0; i < header.hashes_count; ++i) {
ostrm.PutHex32(hash_offsets[i]);
}
// Now we write the data for each hash and verify we got the offset
// correct above...
for (i = 0; i < header.bucket_count; ++i) {
HashToHashData &bucket_entry = hash_buckets[i];
typename HashToHashData::const_iterator pos, end = bucket_entry.end();
for (pos = bucket_entry.begin(); pos != end; ++pos) {
if (!bucket_entry.empty()) {
WriteHashData(pos->second);
}
}
}
}
protected:
typedef std::vector<Entry> collection;
collection m_entries;
};
// A class for reading and using a saved hash table from a block of data
// in memory
template <typename __KeyType, class __HeaderType, class __HashData>
@ -377,8 +208,8 @@ public:
return result;
}
bool Find(const char *name, Pair &pair) const {
if (!name || !name[0])
bool Find(llvm::StringRef name, Pair &pair) const {
if (name.empty())
return false;
if (IsValid()) {
@ -452,7 +283,7 @@ public:
// should be returned. If anything else goes wrong during parsing,
// return "eResultError" and the corresponding "Find()" function will
// be canceled and return false.
virtual Result GetHashDataForName(const char *name,
virtual Result GetHashDataForName(llvm::StringRef name,
lldb::offset_t *hash_data_offset_ptr,
Pair &pair) const = 0;

View File

@ -0,0 +1,6 @@
LEVEL = ../../../make
C_SOURCES := main.c
CFLAGS_EXTRAS += -finput-charset=UTF-8 -fextended-identifiers -std=c99
include $(LEVEL)/Makefile.rules

View File

@ -0,0 +1,19 @@
# coding=utf8
import lldb
from lldbsuite.test.lldbtest import *
import lldbsuite.test.lldbutil as lldbutil
class TestUnicodeSymbols(TestBase):
mydir = TestBase.compute_mydir(__file__)
def test_union_members(self):
self.build()
spec = lldb.SBModuleSpec()
spec.SetFileSpec(lldb.SBFileSpec(self.getBuildArtifact("a.out")))
module = lldb.SBModule(spec)
self.assertTrue(module.IsValid())
mytype = module.FindFirstType("foobár")
self.assertTrue(mytype.IsValid())
self.assertTrue(mytype.IsPointerType())

View File

@ -0,0 +1,5 @@
typedef void *foob\u00E1r;
foob\u00E1r X = 0;
int main() {
return (long)X;
}

View File

@ -379,7 +379,8 @@ bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset,
DWARFMappedHash::MemoryTable::Result
DWARFMappedHash::MemoryTable::GetHashDataForName(
const char *name, lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
Pair &pair) const {
pair.key = m_data.GetU32(hash_data_offset_ptr);
pair.value.clear();
@ -406,7 +407,7 @@ DWARFMappedHash::MemoryTable::GetHashDataForName(
// data to parse at least "count" HashData entries.
// First make sure the entire C string matches...
const bool match = strcmp(name, strp_cstr) == 0;
const bool match = name == strp_cstr;
if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
// If the string doesn't match and we have fixed size data,
@ -573,9 +574,9 @@ size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange(
return die_info_array.size();
}
size_t DWARFMappedHash::MemoryTable::FindByName(const char *name,
size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
DIEArray &die_offsets) {
if (!name || !name[0])
if (name.empty())
return 0;
DIEInfoArray die_info_array;
@ -584,7 +585,7 @@ size_t DWARFMappedHash::MemoryTable::FindByName(const char *name,
return die_info_array.size();
}
size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(const char *name,
size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name,
const dw_tag_t tag,
DIEArray &die_offsets) {
DIEInfoArray die_info_array;
@ -594,8 +595,8 @@ size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(const char *name,
}
size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash(
const char *name, const dw_tag_t tag, const uint32_t qualified_name_hash,
DIEArray &die_offsets) {
llvm::StringRef name, const dw_tag_t tag,
const uint32_t qualified_name_hash, DIEArray &die_offsets) {
DIEInfoArray die_info_array;
if (FindByName(name, die_info_array))
DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
@ -604,7 +605,7 @@ size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash(
}
size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName(
const char *name, DIEArray &die_offsets, bool must_be_implementation) {
llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) {
DIEInfoArray die_info_array;
if (FindByName(name, die_info_array)) {
if (must_be_implementation &&
@ -628,9 +629,9 @@ size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName(
return die_offsets.size();
}
size_t DWARFMappedHash::MemoryTable::FindByName(const char *name,
size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
DIEInfoArray &die_info_array) {
if (!name || !name[0])
if (name.empty())
return 0;
Pair kv_pair;

View File

@ -132,17 +132,17 @@ public:
const uint32_t die_offset_end,
DIEInfoArray &die_info_array) const;
size_t FindByName(const char *name, DIEArray &die_offsets);
size_t FindByName(llvm::StringRef name, DIEArray &die_offsets);
size_t FindByNameAndTag(const char *name, const dw_tag_t tag,
size_t FindByNameAndTag(llvm::StringRef name, const dw_tag_t tag,
DIEArray &die_offsets);
size_t
FindByNameAndTagAndQualifiedNameHash(const char *name, const dw_tag_t tag,
const uint32_t qualified_name_hash,
DIEArray &die_offsets);
size_t FindByNameAndTagAndQualifiedNameHash(
llvm::StringRef name, const dw_tag_t tag,
const uint32_t qualified_name_hash, DIEArray &die_offsets);
size_t FindCompleteObjCClassByName(const char *name, DIEArray &die_offsets,
size_t FindCompleteObjCClassByName(llvm::StringRef name,
DIEArray &die_offsets,
bool must_be_implementation);
protected:
@ -150,9 +150,9 @@ public:
const lldb_private::RegularExpression &regex,
lldb::offset_t *hash_data_offset_ptr, Pair &pair) const;
size_t FindByName(const char *name, DIEInfoArray &die_info_array);
size_t FindByName(llvm::StringRef name, DIEInfoArray &die_info_array);
Result GetHashDataForName(const char *name,
Result GetHashDataForName(llvm::StringRef name,
lldb::offset_t *hash_data_offset_ptr,
Pair &pair) const override;

View File

@ -1510,7 +1510,8 @@ size_t SymbolFileDWARF::GetObjCMethodDIEOffsets(ConstString class_name,
method_die_offsets.clear();
if (m_using_apple_tables) {
if (m_apple_objc_ap.get())
m_apple_objc_ap->FindByName(class_name.GetCString(), method_die_offsets);
m_apple_objc_ap->FindByName(class_name.GetStringRef(),
method_die_offsets);
} else {
if (!m_indexed)
Index();
@ -2183,7 +2184,7 @@ uint32_t SymbolFileDWARF::FindGlobalVariables(
basename))
basename = name_cstr;
m_apple_names_ap->FindByName(basename.data(), die_offsets);
m_apple_names_ap->FindByName(basename, die_offsets);
}
} else {
// Index the DWARF if we haven't already
@ -2489,8 +2490,6 @@ SymbolFileDWARF::FindFunctions(const ConstString &name,
// Remember how many sc_list are in the list before we search in case
// we are appending the results to a variable list.
const char *name_cstr = name.GetCString();
const uint32_t original_size = sc_list.GetSize();
DWARFDebugInfo *info = DebugInfo();
@ -2511,7 +2510,8 @@ SymbolFileDWARF::FindFunctions(const ConstString &name,
// want to canonicalize this (strip double spaces, etc. For now, we
// just add all the
// dies that we find by exact match.
num_matches = m_apple_names_ap->FindByName(name_cstr, die_offsets);
num_matches =
m_apple_names_ap->FindByName(name.GetStringRef(), die_offsets);
for (uint32_t i = 0; i < num_matches; i++) {
const DIERef &die_ref = die_offsets[i];
DWARFDIE die = info->GetDIE(die_ref);
@ -2527,7 +2527,7 @@ SymbolFileDWARF::FindFunctions(const ConstString &name,
GetObjectFile()->GetModule()->ReportErrorIfModifyDetected(
"the DWARF debug information has been modified (.apple_names "
"accelerator table had bad die 0x%8.8x for '%s')",
die_ref.die_offset, name_cstr);
die_ref.die_offset, name.GetCString());
}
}
}
@ -2536,7 +2536,8 @@ SymbolFileDWARF::FindFunctions(const ConstString &name,
if (parent_decl_ctx && parent_decl_ctx->IsValid())
return 0; // no selectors in namespaces
num_matches = m_apple_names_ap->FindByName(name_cstr, die_offsets);
num_matches =
m_apple_names_ap->FindByName(name.GetStringRef(), die_offsets);
// Now make sure these are actually ObjC methods. In this case we can
// simply look up the name,
// and if it is an ObjC method name, we're good.
@ -2556,7 +2557,7 @@ SymbolFileDWARF::FindFunctions(const ConstString &name,
GetObjectFile()->GetModule()->ReportError(
"the DWARF debug information has been modified (.apple_names "
"accelerator table had bad die 0x%8.8x for '%s')",
die_ref.die_offset, name_cstr);
die_ref.die_offset, name.GetCString());
}
}
die_offsets.clear();
@ -2572,7 +2573,8 @@ SymbolFileDWARF::FindFunctions(const ConstString &name,
// FIXME: Arrange the logic above so that we don't calculate the base
// name twice:
num_matches = m_apple_names_ap->FindByName(name_cstr, die_offsets);
num_matches =
m_apple_names_ap->FindByName(name.GetStringRef(), die_offsets);
for (uint32_t i = 0; i < num_matches; i++) {
const DIERef &die_ref = die_offsets[i];
@ -2626,7 +2628,7 @@ SymbolFileDWARF::FindFunctions(const ConstString &name,
GetObjectFile()->GetModule()->ReportErrorIfModifyDetected(
"the DWARF debug information has been modified (.apple_names "
"accelerator table had bad die 0x%8.8x for '%s')",
die_ref.die_offset, name_cstr);
die_ref.die_offset, name.GetCString());
}
}
die_offsets.clear();
@ -2850,8 +2852,7 @@ uint32_t SymbolFileDWARF::FindTypes(
if (m_using_apple_tables) {
if (m_apple_types_ap.get()) {
const char *name_cstr = name.GetCString();
m_apple_types_ap->FindByName(name_cstr, die_offsets);
m_apple_types_ap->FindByName(name.GetStringRef(), die_offsets);
}
} else {
if (!m_indexed)
@ -2944,8 +2945,7 @@ size_t SymbolFileDWARF::FindTypes(const std::vector<CompilerContext> &context,
if (m_using_apple_tables) {
if (m_apple_types_ap.get()) {
const char *name_cstr = name.GetCString();
m_apple_types_ap->FindByName(name_cstr, die_offsets);
m_apple_types_ap->FindByName(name.GetStringRef(), die_offsets);
}
} else {
if (!m_indexed)
@ -3013,8 +3013,7 @@ SymbolFileDWARF::FindNamespace(const SymbolContext &sc, const ConstString &name,
// get indexed and make their global DIE index list
if (m_using_apple_tables) {
if (m_apple_namespaces_ap.get()) {
const char *name_cstr = name.GetCString();
m_apple_namespaces_ap->FindByName(name_cstr, die_offsets);
m_apple_namespaces_ap->FindByName(name.GetStringRef(), die_offsets);
}
} else {
if (!m_indexed)
@ -3207,9 +3206,8 @@ TypeSP SymbolFileDWARF::FindCompleteObjCDefinitionTypeForDIE(
if (m_using_apple_tables) {
if (m_apple_types_ap.get()) {
const char *name_cstr = type_name.GetCString();
m_apple_types_ap->FindCompleteObjCClassByName(name_cstr, die_offsets,
must_be_implementation);
m_apple_types_ap->FindCompleteObjCClassByName(
type_name.GetStringRef(), die_offsets, must_be_implementation);
}
} else {
if (!m_indexed)
@ -3398,21 +3396,21 @@ TypeSP SymbolFileDWARF::FindDefinitionTypeForDWARFDeclContext(
DWARFMappedHash::eAtomTypeQualNameHash);
if (has_tag && has_qualified_name_hash) {
const char *qualified_name = dwarf_decl_ctx.GetQualifiedName();
const uint32_t qualified_name_hash =
MappedHash::HashStringUsingDJB(qualified_name);
const uint32_t qualified_name_hash = llvm::djbHash(qualified_name);
if (log)
GetObjectFile()->GetModule()->LogMessage(
log, "FindByNameAndTagAndQualifiedNameHash()");
m_apple_types_ap->FindByNameAndTagAndQualifiedNameHash(
type_name.GetCString(), tag, qualified_name_hash, die_offsets);
type_name.GetStringRef(), tag, qualified_name_hash,
die_offsets);
} else if (has_tag) {
if (log)
GetObjectFile()->GetModule()->LogMessage(log,
"FindByNameAndTag()");
m_apple_types_ap->FindByNameAndTag(type_name.GetCString(), tag,
m_apple_types_ap->FindByNameAndTag(type_name.GetStringRef(), tag,
die_offsets);
} else {
m_apple_types_ap->FindByName(type_name.GetCString(), die_offsets);
m_apple_types_ap->FindByName(type_name.GetStringRef(), die_offsets);
}
}
} else {

View File

@ -23,6 +23,7 @@
#include "lldb/Utility/Timer.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DJB.h"
using namespace lldb;
using namespace lldb_private;
@ -45,8 +46,7 @@ bool ObjCLanguageRuntime::AddClass(ObjCISA isa,
if (isa != 0) {
m_isa_to_descriptor[isa] = descriptor_sp;
// class_name is assumed to be valid
m_hash_to_isa_map.insert(
std::make_pair(MappedHash::HashStringUsingDJB(class_name), isa));
m_hash_to_isa_map.insert(std::make_pair(llvm::djbHash(class_name), isa));
return true;
}
return false;
@ -180,8 +180,7 @@ ObjCLanguageRuntime::GetDescriptorIterator(const ConstString &name) {
} else {
// Name hashes were provided, so use them to efficiently lookup name to
// isa/descriptor
const uint32_t name_hash =
MappedHash::HashStringUsingDJB(name.GetCString());
const uint32_t name_hash = llvm::djbHash(name.GetStringRef());
std::pair<HashToISAIterator, HashToISAIterator> range =
m_hash_to_isa_map.equal_range(name_hash);
for (HashToISAIterator range_pos = range.first; range_pos != range.second;