Restore "[ThinLTO] Use MD5 hash in function index." with fix

This restores commit r260408, along with a fix for a bot failure.

The bot failure was caused by dereferencing a unique_ptr in the same
call instruction parameter list where it was passed via std::move.
Apparently due to luck this was not exposed when I built the compiler
with clang, only with gcc.

llvm-svn: 260442
This commit is contained in:
Teresa Johnson 2016-02-10 21:55:02 +00:00
parent 3e58a6a7b2
commit e1164de5d0
14 changed files with 167 additions and 104 deletions

View File

@ -105,6 +105,9 @@ enum { BITCODE_CURRENT_EPOCH = 0 };
// METADATA_VALUES: [numvals]
MODULE_CODE_METADATA_VALUES = 15,
// SOURCE_FILENAME: [namechar x N]
MODULE_CODE_SOURCE_FILENAME = 16,
};
/// PARAMATTR blocks have code for defining a parameter attribute set.
@ -172,7 +175,7 @@ enum { BITCODE_CURRENT_EPOCH = 0 };
VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N]
VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N]
VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N]
// VST_COMBINED_FNENTRY: [offset, namechar x N]
// VST_COMBINED_FNENTRY: [funcsumoffset, funcguid]
VST_CODE_COMBINED_FNENTRY = 4
};

View File

@ -27,6 +27,7 @@
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MD5.h"
namespace llvm {
@ -650,6 +651,12 @@ public:
GlobalValue::LinkageTypes Linkage,
StringRef FileName);
/// Return a 64-bit global unique ID constructed from global function name
/// (i.e. returned by getGlobalIdentifier).
static uint64_t getGUID(StringRef GlobalFuncName) {
return MD5Hash(GlobalFuncName);
}
private:
void allocHungoffUselist();
template<int Idx> void setHungoffOperand(Constant *C);

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
@ -146,8 +147,12 @@ public:
/// COMDAT functions of the same name.
typedef std::vector<std::unique_ptr<FunctionInfo>> FunctionInfoList;
/// Map from function name to corresponding function info structures.
typedef StringMap<FunctionInfoList> FunctionInfoMapTy;
/// Map from function GUID to corresponding function info structures.
/// Use a std::map rather than a DenseMap since it will likely incur
/// less overhead, as the value type is not very small and the size
/// of the map is unknown, resulting in inefficiencies due to repeated
/// insertions and resizing.
typedef std::map<uint64_t, FunctionInfoList> FunctionInfoMapTy;
/// Type used for iterating through the function info map.
typedef FunctionInfoMapTy::const_iterator const_funcinfo_iterator;
@ -184,17 +189,21 @@ public:
/// Get the list of function info objects for a given function.
const FunctionInfoList &getFunctionInfoList(StringRef FuncName) {
return FunctionMap[FuncName];
return FunctionMap[Function::getGUID(FuncName)];
}
/// Get the list of function info objects for a given function.
const const_funcinfo_iterator findFunctionInfoList(StringRef FuncName) const {
return FunctionMap.find(FuncName);
return FunctionMap.find(Function::getGUID(FuncName));
}
/// Add a function info for a function of the given name.
void addFunctionInfo(StringRef FuncName, std::unique_ptr<FunctionInfo> Info) {
FunctionMap[FuncName].push_back(std::move(Info));
FunctionMap[Function::getGUID(FuncName)].push_back(std::move(Info));
}
void addFunctionInfo(uint64_t FuncGUID, std::unique_ptr<FunctionInfo> Info) {
FunctionMap[FuncGUID].push_back(std::move(Info));
}
/// Iterator to allow writer to walk through table during emission.

View File

@ -170,6 +170,8 @@ private:
std::unique_ptr<GVMaterializer>
Materializer; ///< Used to materialize GlobalValues
std::string ModuleID; ///< Human readable identifier for the module
std::string SourceFileName; ///< Original source file name for module,
///< recorded in bitcode.
std::string TargetTriple; ///< Platform target triple Module compiled on
///< Format: (arch)(sub)-(vendor)-(sys0-(abi)
void *NamedMDSymTab; ///< NamedMDNode names.
@ -195,6 +197,12 @@ public:
/// @returns the module identifier as a string
const std::string &getModuleIdentifier() const { return ModuleID; }
/// Get the module's original source file name. When compiling from
/// bitcode, this is taken from a bitcode record where it was recorded.
/// For other compiles it is the same as the ModuleID, which would
/// contain the source file name.
const std::string &getSourceFileName() const { return SourceFileName; }
/// \brief Get a short "name" for the module.
///
/// This is useful for debugging or logging. It is essentially a convenience
@ -240,6 +248,9 @@ public:
/// Set the module identifier.
void setModuleIdentifier(StringRef ID) { ModuleID = ID; }
/// Set the module's original source file name.
void setSourceFileName(StringRef Name) { SourceFileName = Name; }
/// Set the data layout
void setDataLayout(StringRef Desc);
void setDataLayout(const DataLayout &Other);

View File

@ -458,6 +458,9 @@ class FunctionIndexBitcodeReader {
/// summary records.
DenseMap<uint64_t, StringRef> ModuleIdMap;
/// Original source file name recorded in a bitcode record.
std::string SourceFileName;
public:
std::error_code error(BitcodeError E, const Twine &Message);
std::error_code error(BitcodeError E);
@ -3697,6 +3700,13 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
assert(MetadataList.size() == 0);
MetadataList.resize(NumModuleMDs);
break;
/// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
case bitc::MODULE_CODE_SOURCE_FILENAME:
SmallString<128> ValueName;
if (convertToString(Record, 0, ValueName))
return error("Invalid record");
TheModule->setSourceFileName(ValueName);
break;
}
Record.clear();
}
@ -5454,24 +5464,30 @@ std::error_code FunctionIndexBitcodeReader::parseValueSymbolTable() {
return error("Invalid record");
unsigned ValueID = Record[0];
uint64_t FuncOffset = Record[1];
std::unique_ptr<FunctionInfo> FuncInfo =
llvm::make_unique<FunctionInfo>(FuncOffset);
if (foundFuncSummary() && !IsLazy) {
assert(!IsLazy && "Lazy summary read only supported for combined index");
// Gracefully handle bitcode without a function summary section,
// which will simply not populate the index.
if (foundFuncSummary()) {
DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
SummaryMap.find(ValueID);
assert(SMI != SummaryMap.end() && "Summary info not found");
std::unique_ptr<FunctionInfo> FuncInfo =
llvm::make_unique<FunctionInfo>(FuncOffset);
FuncInfo->setFunctionSummary(std::move(SMI->second));
assert(!SourceFileName.empty());
std::string FunctionGlobalId = Function::getGlobalIdentifier(
ValueName, FuncInfo->functionSummary()->getFunctionLinkage(),
SourceFileName);
TheIndex->addFunctionInfo(FunctionGlobalId, std::move(FuncInfo));
}
TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
ValueName.clear();
break;
}
case bitc::VST_CODE_COMBINED_FNENTRY: {
// VST_CODE_FNENTRY: [offset, namechar x N]
if (convertToString(Record, 1, ValueName))
return error("Invalid record");
// VST_CODE_COMBINED_FNENTRY: [offset, funcguid]
uint64_t FuncSummaryOffset = Record[0];
uint64_t FuncGUID = Record[1];
std::unique_ptr<FunctionInfo> FuncInfo =
llvm::make_unique<FunctionInfo>(FuncSummaryOffset);
if (foundFuncSummary() && !IsLazy) {
@ -5480,7 +5496,7 @@ std::error_code FunctionIndexBitcodeReader::parseValueSymbolTable() {
assert(SMI != SummaryMap.end() && "Summary info not found");
FuncInfo->setFunctionSummary(std::move(SMI->second));
}
TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
TheIndex->addFunctionInfo(FuncGUID, std::move(FuncInfo));
ValueName.clear();
break;
@ -5499,6 +5515,8 @@ std::error_code FunctionIndexBitcodeReader::parseModule() {
if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
return error("Invalid record");
SmallVector<uint64_t, 64> Record;
// Read the function index for this module.
while (1) {
BitstreamEntry Entry = Stream.advance();
@ -5551,7 +5569,24 @@ std::error_code FunctionIndexBitcodeReader::parseModule() {
continue;
case BitstreamEntry::Record:
Stream.skipRecord(Entry.ID);
// Once we find the single record of interest, skip the rest.
if (!SourceFileName.empty())
Stream.skipRecord(Entry.ID);
else {
Record.clear();
auto BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) {
default:
break; // Default behavior, ignore unknown content.
/// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
case bitc::MODULE_CODE_SOURCE_FILENAME:
SmallString<128> ValueName;
if (convertToString(Record, 0, ValueName))
return error("Invalid record");
SourceFileName = ValueName.c_str();
break;
}
}
continue;
}
}

View File

@ -618,6 +618,24 @@ static uint64_t WriteValueSymbolTableForwardDecl(const ValueSymbolTable &VST,
return Stream.GetCurrentBitNo() - 32;
}
enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
/// Determine the encoding to use for the given string name and length.
static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
bool isChar6 = true;
for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
if (isChar6)
isChar6 = BitCodeAbbrevOp::isChar6(*C);
if ((unsigned char)*C & 128)
// don't bother scanning the rest.
return SE_Fixed8;
}
if (isChar6)
return SE_Char6;
else
return SE_Fixed7;
}
/// Emit top-level description of module, including target triple, inline asm,
/// descriptors for global variables, and function prototype info.
/// Returns the bit offset to backpatch with the location of the real VST.
@ -791,13 +809,40 @@ static uint64_t WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// function importing where we lazy load the metadata as a postpass,
// we want to avoid parsing the module-level metadata before parsing
// the imported functions.
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv);
Vals.push_back(VE.numMDs());
Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev);
Vals.clear();
{
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv);
Vals.push_back(VE.numMDs());
Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev);
Vals.clear();
}
// Emit the module's source file name.
{
StringEncoding Bits =
getStringEncoding(M->getName().data(), M->getName().size());
BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
if (Bits == SE_Char6)
AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
else if (Bits == SE_Fixed7)
AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(AbbrevOpToUse);
unsigned FilenameAbbrev = Stream.EmitAbbrev(Abbv);
for (const auto P : M->getSourceFileName())
Vals.push_back((unsigned char)P);
// Emit the finished record.
Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
Vals.clear();
}
uint64_t VSTOffsetPlaceholder =
WriteValueSymbolTableForwardDecl(M->getValueSymbolTable(), Stream);
@ -2195,24 +2240,6 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Vals.clear();
}
enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
/// Determine the encoding to use for the given string name and length.
static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
bool isChar6 = true;
for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
if (isChar6)
isChar6 = BitCodeAbbrevOp::isChar6(*C);
if ((unsigned char)*C & 128)
// don't bother scanning the rest.
return SE_Fixed8;
}
if (isChar6)
return SE_Char6;
else
return SE_Fixed7;
}
/// Emit names for globals/functions etc. The VSTOffsetPlaceholder,
/// BitcodeStartBit and FunctionIndex are only passed for the module-level
/// VST, where we are including a function bitcode index and need to
@ -2352,51 +2379,24 @@ static void WriteCombinedValueSymbolTable(const FunctionInfoIndex &Index,
BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
// 8-bit fixed-width VST_CODE_COMBINED_FNENTRY function strings.
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
unsigned FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcsumoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcguid
unsigned FnEntryAbbrev = Stream.EmitAbbrev(Abbv);
// 7-bit fixed width VST_CODE_COMBINED_FNENTRY function strings.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
unsigned FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
// 6-bit char6 VST_CODE_COMBINED_FNENTRY function strings.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
unsigned FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
// FIXME: We know if the type names can use 7-bit ascii.
SmallVector<unsigned, 64> NameVals;
SmallVector<uint64_t, 64> NameVals;
for (const auto &FII : Index) {
for (const auto &FI : FII.getValue()) {
for (const auto &FI : FII.second) {
NameVals.push_back(FI->bitcodeIndex());
StringRef FuncName = FII.first();
uint64_t FuncGUID = FII.first;
// Figure out the encoding to use for the name.
StringEncoding Bits = getStringEncoding(FuncName.data(), FuncName.size());
// VST_CODE_COMBINED_FNENTRY: [funcsumoffset, funcguid]
unsigned AbbrevToUse = FnEntryAbbrev;
// VST_CODE_COMBINED_FNENTRY: [funcsumoffset, namechar x N]
unsigned AbbrevToUse = FnEntry8BitAbbrev;
if (Bits == SE_Char6)
AbbrevToUse = FnEntry6BitAbbrev;
else if (Bits == SE_Fixed7)
AbbrevToUse = FnEntry7BitAbbrev;
for (const auto P : FuncName)
NameVals.push_back((unsigned char)P);
NameVals.push_back(FuncGUID);
// Emit the finished record.
Stream.EmitRecord(bitc::VST_CODE_COMBINED_FNENTRY, NameVals, AbbrevToUse);
@ -2855,7 +2855,7 @@ static void WriteCombinedFunctionSummary(const FunctionInfoIndex &I,
SmallVector<unsigned, 64> NameVals;
for (const auto &FII : I) {
for (auto &FI : FII.getValue()) {
for (auto &FI : FII.second) {
FunctionSummary *FS = FI->functionSummary();
assert(FS);

View File

@ -23,7 +23,7 @@ void FunctionInfoIndex::mergeFrom(std::unique_ptr<FunctionInfoIndex> Other,
StringRef ModPath;
for (auto &OtherFuncInfoLists : *Other) {
std::string FuncName = OtherFuncInfoLists.getKey();
uint64_t FuncGUID = OtherFuncInfoLists.first;
FunctionInfoList &List = OtherFuncInfoLists.second;
// Assert that the func info list only has one entry, since we shouldn't
@ -49,20 +49,9 @@ void FunctionInfoIndex::mergeFrom(std::unique_ptr<FunctionInfoIndex> Other,
// string reference owned by the combined index.
Info->functionSummary()->setModulePath(ModPath);
// If it is a local function, rename it.
if (GlobalValue::isLocalLinkage(
Info->functionSummary()->getFunctionLinkage())) {
// Any local functions are virtually renamed when being added to the
// combined index map, to disambiguate from other functions with
// the same name. The symbol table created for the combined index
// file should contain the renamed symbols.
FuncName =
FunctionInfoIndex::getGlobalNameForLocal(FuncName, NextModuleId);
}
// Add new function info to existing list. There may be duplicates when
// combining FunctionMap entries, due to COMDAT functions. Any local
// functions were virtually renamed above.
addFunctionInfo(FuncName, std::move(Info));
// functions were given unique global IDs.
addFunctionInfo(FuncGUID, std::move(Info));
}
}

View File

@ -47,7 +47,7 @@ template class llvm::SymbolTableListTraits<GlobalAlias>;
//
Module::Module(StringRef MID, LLVMContext &C)
: Context(C), Materializer(), ModuleID(MID), DL("") {
: Context(C), Materializer(), ModuleID(MID), SourceFileName(MID), DL("") {
ValSymTab = new ValueSymbolTable();
NamedMDSymTab = new StringMap<NamedMDNode *>();
Context.addModule(this);

View File

@ -126,7 +126,11 @@ static void findExternalCalls(const Module &DestModule, Function &F,
if (CalledFunction->hasInternalLinkage()) {
ImportedName = Renamed;
}
auto It = CalledFunctions.insert(ImportedName);
// Compute the global identifier used in the function index.
auto CalledFunctionGlobalID = Function::getGlobalIdentifier(
CalledFunction->getName(), CalledFunction->getLinkage(),
CalledFunction->getParent()->getSourceFileName());
auto It = CalledFunctions.insert(CalledFunctionGlobalID);
if (!It.second) {
// This is a call to a function we already considered, skip.
continue;
@ -213,14 +217,12 @@ static void GetImportList(Module &DestModule,
GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
if (!SGV) {
// The destination module is referencing function using their renamed name
// when importing a function that was originally local in the source
// module. The source module we have might not have been renamed so we try
// to remove the suffix added during the renaming to recover the original
// The function is referenced by a global identifier, which has the
// source file name prepended for functions that were originally local
// in the source module. Strip any prepended name to recover the original
// name in the source module.
std::pair<StringRef, StringRef> Split =
CalledFunctionName.split(".llvm.");
SGV = SrcModule.getNamedValue(Split.first);
std::pair<StringRef, StringRef> Split = CalledFunctionName.split(":");
SGV = SrcModule.getNamedValue(Split.second);
assert(SGV && "Can't find function to import in source module");
}
if (!SGV) {

Binary file not shown.

View File

@ -0,0 +1,2 @@
; RUN: llvm-bcanalyzer -dump %p/Inputs/source-filename.bc | FileCheck %s
; CHECK: <SOURCE_FILENAME {{.*}} record string = 'source-filename.c'

View File

@ -24,8 +24,10 @@
; COMBINED-NEXT: <COMBINED_ENTRY
; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
; COMBINED-NEXT: <VALUE_SYMTAB
; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
; Check that the format is: op0=offset, op1=funcguid, where funcguid is
; the lower 64 bits of the function name MD5.
; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
; COMBINED-NEXT: </VALUE_SYMTAB
define void @f() {

View File

@ -14,8 +14,10 @@
; COMBINED-NEXT: <COMBINED_ENTRY
; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
; COMBINED-NEXT: <VALUE_SYMTAB
; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
; Check that the format is: op0=offset, op1=funcguid, where funcguid is
; the lower 64 bits of the function name MD5.
; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
; COMBINED-NEXT: </VALUE_SYMTAB
define void @f() {

View File

@ -173,6 +173,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(MODULE_CODE, GCNAME)
STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES)
STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
}
case bitc::IDENTIFICATION_BLOCK_ID:
switch (CodeID) {