Refactor PGO function naming and MD5 hashing support out of ProfileData

Summary:
Move the function renaming logic into the Function class, and the
MD5Hash routine into the MD5 header.

This will enable these routines to be shared with ThinLTO, which
will be changed to store the MD5 hash instead of full function name
in the combined index for significant size reductions. And using the same
function naming for locals in the function index facilitates future
integration with indirect call value profiles.

Reviewers: davidxl

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D17006

llvm-svn: 260197
This commit is contained in:
Teresa Johnson 2016-02-09 05:12:44 +00:00
parent e5fa25a094
commit 833571ecb4
5 changed files with 47 additions and 31 deletions

View File

@ -639,6 +639,14 @@ public:
/// to \a DISubprogram. /// to \a DISubprogram.
DISubprogram *getSubprogram() const; DISubprogram *getSubprogram() const;
/// Return the modified name for a function suitable to be
/// used as the key for a global lookup (e.g. profile or ThinLTO).
/// The function's original name is \c FuncName and has linkage of type
/// \c Linkage. The function is defined in module \c FileName.
static std::string getGlobalIdentifier(StringRef FuncName,
GlobalValue::LinkageTypes Linkage,
StringRef FileName);
private: private:
void allocHungoffUselist(); void allocHungoffUselist();
template<int Idx> void setHungoffOperand(Constant *C); template<int Idx> void setHungoffOperand(Constant *C);

View File

@ -626,21 +626,10 @@ enum class HashT : uint32_t {
Last = MD5 Last = MD5
}; };
static inline uint64_t MD5Hash(StringRef Str) {
MD5 Hash;
Hash.update(Str);
llvm::MD5::MD5Result Result;
Hash.final(Result);
// Return the least significant 8 bytes. Our MD5 implementation returns the
// result in little endian, so we may need to swap bytes.
using namespace llvm::support;
return endian::read<uint64_t, little, unaligned>(Result);
}
inline uint64_t ComputeHash(HashT Type, StringRef K) { inline uint64_t ComputeHash(HashT Type, StringRef K) {
switch (Type) { switch (Type) {
case HashT::MD5: case HashT::MD5:
return IndexedInstrProf::MD5Hash(K); return MD5Hash(K);
} }
llvm_unreachable("Unhandled hash type"); llvm_unreachable("Unhandled hash type");
} }

View File

@ -31,6 +31,7 @@
#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallString.h"
#include "llvm/Support/DataTypes.h" #include "llvm/Support/DataTypes.h"
#include "llvm/Support/Endian.h"
namespace llvm { namespace llvm {
@ -65,6 +66,18 @@ private:
const uint8_t *body(ArrayRef<uint8_t> Data); const uint8_t *body(ArrayRef<uint8_t> Data);
}; };
/// Helper to compute and return a 64-bit MD5 Hash of a given string.
inline uint64_t MD5Hash(StringRef Str) {
MD5 Hash;
Hash.update(Str);
llvm::MD5::MD5Result Result;
Hash.final(Result);
// Return the least significant 8 bytes. Our MD5 implementation returns the
// result in little endian, so we may need to swap bytes.
using namespace llvm::support;
return endian::read<uint64_t, little, unaligned>(Result);
}
} }
#endif #endif

View File

@ -997,3 +997,27 @@ Optional<uint64_t> Function::getEntryCount() const {
} }
return None; return None;
} }
std::string Function::getGlobalIdentifier(StringRef FuncName,
GlobalValue::LinkageTypes Linkage,
StringRef FileName) {
// Function names may be prefixed with a binary '1' to indicate
// that the backend should not modify the symbols due to any platform
// naming convention. Do not include that '1' in the PGO profile name.
if (FuncName[0] == '\1')
FuncName = FuncName.substr(1);
std::string NewFuncName = FuncName;
if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
// For local symbols, prepend the main file name to distinguish them.
// Do not include the full path in the file name since there's no guarantee
// that it will stay the same, e.g., if the files are checked out from
// version control in different locations.
if (FileName.empty())
NewFuncName = NewFuncName.insert(0, "<unknown>:");
else
NewFuncName = NewFuncName.insert(0, FileName.str() + ":");
}
return NewFuncName;
}

View File

@ -80,25 +80,7 @@ std::string getPGOFuncName(StringRef RawFuncName,
GlobalValue::LinkageTypes Linkage, GlobalValue::LinkageTypes Linkage,
StringRef FileName, StringRef FileName,
uint64_t Version LLVM_ATTRIBUTE_UNUSED) { uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
return Function::getGlobalIdentifier(RawFuncName, Linkage, FileName);
// Function names may be prefixed with a binary '1' to indicate
// that the backend should not modify the symbols due to any platform
// naming convention. Do not include that '1' in the PGO profile name.
if (RawFuncName[0] == '\1')
RawFuncName = RawFuncName.substr(1);
std::string FuncName = RawFuncName;
if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
// For local symbols, prepend the main file name to distinguish them.
// Do not include the full path in the file name since there's no guarantee
// that it will stay the same, e.g., if the files are checked out from
// version control in different locations.
if (FileName.empty())
FuncName = FuncName.insert(0, "<unknown>:");
else
FuncName = FuncName.insert(0, FileName.str() + ":");
}
return FuncName;
} }
std::string getPGOFuncName(const Function &F, uint64_t Version) { std::string getPGOFuncName(const Function &F, uint64_t Version) {