llvm-project/llvm/lib/Linker/LinkModules.cpp

606 lines
20 KiB
C++
Raw Normal View History

2009-03-03 18:04:23 +08:00
//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the LLVM module linker.
//
//===----------------------------------------------------------------------===//
#include "LinkDiagnosticInfo.h"
#include "llvm-c/Linker.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Support/Error.h"
using namespace llvm;
namespace {
/// This is an implementation class for the LinkModules function, which is the
/// entrypoint for this file.
class ModuleLinker {
IRMover &Mover;
std::unique_ptr<Module> SrcM;
SetVector<GlobalValue *> ValuesToLink;
/// For symbol clashes, prefer those from Src.
unsigned Flags;
/// List of global value names that should be internalized.
StringSet<> Internalize;
/// Function that will perform the actual internalization. The reason for a
/// callback is that the linker cannot call internalizeModule without
/// creating a circular dependency between IPO and the linker.
std::function<void(Module &, const StringSet<> &)> InternalizeCallback;
/// Used as the callback for lazy linking.
/// The mover has just hit GV and we have to decide if it, and other members
/// of the same comdat, should be linked. Every member to be linked is passed
/// to Add.
void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add);
bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }
bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
const GlobalValue &Src);
/// Should we have mover and linker error diag info?
bool emitError(const Twine &Message) {
SrcM->getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
return true;
}
bool getComdatLeader(Module &M, StringRef ComdatName,
const GlobalVariable *&GVar);
bool computeResultingSelectionKind(StringRef ComdatName,
Comdat::SelectionKind Src,
Comdat::SelectionKind Dst,
Comdat::SelectionKind &Result,
bool &LinkFromSrc);
std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>>
ComdatsChosen;
bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
bool &LinkFromSrc);
// Keep track of the lazy linked global members of each comdat in source.
DenseMap<const Comdat *, std::vector<GlobalValue *>> LazyComdatMembers;
/// Given a global in the source module, return the global in the
/// destination module that is being linked to, if any.
GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
Module &DstM = Mover.getModule();
// If the source has no name it can't link. If it has local linkage,
// there is no name match-up going on.
if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
return nullptr;
// Otherwise see if we have a match in the destination module's symtab.
GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
if (!DGV)
return nullptr;
// If we found a global with the same name in the dest module, but it has
// internal linkage, we are really not doing any linkage here.
if (DGV->hasLocalLinkage())
return nullptr;
// Otherwise, we do in fact link to the destination global.
return DGV;
}
/// Drop GV if it is a member of a comdat that we are dropping.
/// This can happen with COFF's largest selection kind.
void dropReplacedComdat(GlobalValue &GV,
const DenseSet<const Comdat *> &ReplacedDstComdats);
bool linkIfNeeded(GlobalValue &GV);
public:
ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
std::function<void(Module &, const StringSet<> &)>
InternalizeCallback = {})
: Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags),
InternalizeCallback(std::move(InternalizeCallback)) {}
bool run();
};
}
2015-11-29 22:33:06 +08:00
static GlobalValue::VisibilityTypes
getMinVisibility(GlobalValue::VisibilityTypes A,
GlobalValue::VisibilityTypes B) {
if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility)
return GlobalValue::HiddenVisibility;
if (A == GlobalValue::ProtectedVisibility ||
B == GlobalValue::ProtectedVisibility)
return GlobalValue::ProtectedVisibility;
return GlobalValue::DefaultVisibility;
}
bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
const GlobalVariable *&GVar) {
const GlobalValue *GVal = M.getNamedValue(ComdatName);
if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
GVal = GA->getBaseObject();
if (!GVal)
// We cannot resolve the size of the aliasee yet.
return emitError("Linking COMDATs named '" + ComdatName +
"': COMDAT key involves incomputable alias size.");
}
GVar = dyn_cast_or_null<GlobalVariable>(GVal);
if (!GVar)
return emitError(
"Linking COMDATs named '" + ComdatName +
"': GlobalVariable required for data dependent selection!");
return false;
}
bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
Comdat::SelectionKind Src,
Comdat::SelectionKind Dst,
Comdat::SelectionKind &Result,
bool &LinkFromSrc) {
Module &DstM = Mover.getModule();
// The ability to mix Comdat::SelectionKind::Any with
// Comdat::SelectionKind::Largest is a behavior that comes from COFF.
bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any ||
Dst == Comdat::SelectionKind::Largest;
bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any ||
Src == Comdat::SelectionKind::Largest;
if (DstAnyOrLargest && SrcAnyOrLargest) {
if (Dst == Comdat::SelectionKind::Largest ||
Src == Comdat::SelectionKind::Largest)
Result = Comdat::SelectionKind::Largest;
else
Result = Comdat::SelectionKind::Any;
} else if (Src == Dst) {
Result = Dst;
} else {
return emitError("Linking COMDATs named '" + ComdatName +
"': invalid selection kinds!");
}
switch (Result) {
case Comdat::SelectionKind::Any:
// Go with Dst.
LinkFromSrc = false;
break;
case Comdat::SelectionKind::NoDuplicates:
return emitError("Linking COMDATs named '" + ComdatName +
"': noduplicates has been violated!");
case Comdat::SelectionKind::ExactMatch:
case Comdat::SelectionKind::Largest:
case Comdat::SelectionKind::SameSize: {
const GlobalVariable *DstGV;
const GlobalVariable *SrcGV;
if (getComdatLeader(DstM, ComdatName, DstGV) ||
getComdatLeader(*SrcM, ComdatName, SrcGV))
return true;
const DataLayout &DstDL = DstM.getDataLayout();
const DataLayout &SrcDL = SrcM->getDataLayout();
uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getValueType());
uint64_t SrcSize = SrcDL.getTypeAllocSize(SrcGV->getValueType());
if (Result == Comdat::SelectionKind::ExactMatch) {
if (SrcGV->getInitializer() != DstGV->getInitializer())
return emitError("Linking COMDATs named '" + ComdatName +
"': ExactMatch violated!");
LinkFromSrc = false;
} else if (Result == Comdat::SelectionKind::Largest) {
LinkFromSrc = SrcSize > DstSize;
} else if (Result == Comdat::SelectionKind::SameSize) {
if (SrcSize != DstSize)
return emitError("Linking COMDATs named '" + ComdatName +
"': SameSize violated!");
LinkFromSrc = false;
} else {
llvm_unreachable("unknown selection kind");
}
break;
}
}
return false;
}
bool ModuleLinker::getComdatResult(const Comdat *SrcC,
Comdat::SelectionKind &Result,
bool &LinkFromSrc) {
Module &DstM = Mover.getModule();
Comdat::SelectionKind SSK = SrcC->getSelectionKind();
StringRef ComdatName = SrcC->getName();
Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
if (DstCI == ComdatSymTab.end()) {
// Use the comdat if it is only available in one of the modules.
LinkFromSrc = true;
Result = SSK;
return false;
}
const Comdat *DstC = &DstCI->second;
Comdat::SelectionKind DSK = DstC->getSelectionKind();
return computeResultingSelectionKind(ComdatName, SSK, DSK, Result,
LinkFromSrc);
}
bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
const GlobalValue &Dest,
const GlobalValue &Src) {
// Should we unconditionally use the Src?
if (shouldOverrideFromSrc()) {
LinkFromSrc = true;
return false;
}
// We always have to add Src if it has appending linkage.
if (Src.hasAppendingLinkage()) {
LinkFromSrc = true;
return false;
}
bool SrcIsDeclaration = Src.isDeclarationForLinker();
bool DestIsDeclaration = Dest.isDeclarationForLinker();
if (SrcIsDeclaration) {
// If Src is external or if both Src & Dest are external.. Just link the
// external globals, we aren't adding anything.
if (Src.hasDLLImportStorageClass()) {
// If one of GVs is marked as DLLImport, result should be dllimport'ed.
LinkFromSrc = DestIsDeclaration;
return false;
}
// If the Dest is weak, use the source linkage.
if (Dest.hasExternalWeakLinkage()) {
LinkFromSrc = true;
return false;
}
// Link an available_externally over a declaration.
LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration();
return false;
}
if (DestIsDeclaration) {
// If Dest is external but Src is not:
LinkFromSrc = true;
return false;
}
if (Src.hasCommonLinkage()) {
if (Dest.hasLinkOnceLinkage() || Dest.hasWeakLinkage()) {
LinkFromSrc = true;
return false;
}
if (!Dest.hasCommonLinkage()) {
LinkFromSrc = false;
return false;
}
const DataLayout &DL = Dest.getParent()->getDataLayout();
uint64_t DestSize = DL.getTypeAllocSize(Dest.getValueType());
uint64_t SrcSize = DL.getTypeAllocSize(Src.getValueType());
LinkFromSrc = SrcSize > DestSize;
return false;
}
if (Src.isWeakForLinker()) {
assert(!Dest.hasExternalWeakLinkage());
assert(!Dest.hasAvailableExternallyLinkage());
if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) {
LinkFromSrc = true;
return false;
}
LinkFromSrc = false;
return false;
}
if (Dest.isWeakForLinker()) {
assert(Src.hasExternalLinkage());
LinkFromSrc = true;
return false;
}
assert(!Src.hasExternalWeakLinkage());
assert(!Dest.hasExternalWeakLinkage());
assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() &&
"Unexpected linkage type!");
return emitError("Linking globals named '" + Src.getName() +
"': symbol multiply defined!");
}
bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
GlobalValue *DGV = getLinkedToGlobal(&GV);
if (shouldLinkOnlyNeeded()) {
// Always import variables with appending linkage.
if (!GV.hasAppendingLinkage()) {
// Don't import globals unless they are referenced by the destination
// module.
if (!DGV)
return false;
// Don't import globals that are already defined in the destination module
if (!DGV->isDeclaration())
return false;
}
}
if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) {
auto *DGVar = dyn_cast<GlobalVariable>(DGV);
auto *SGVar = dyn_cast<GlobalVariable>(&GV);
if (DGVar && SGVar) {
if (DGVar->isDeclaration() && SGVar->isDeclaration() &&
(!DGVar->isConstant() || !SGVar->isConstant())) {
DGVar->setConstant(false);
SGVar->setConstant(false);
}
if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
unsigned Align = std::max(DGVar->getAlignment(), SGVar->getAlignment());
SGVar->setAlignment(Align);
DGVar->setAlignment(Align);
}
}
GlobalValue::VisibilityTypes Visibility =
getMinVisibility(DGV->getVisibility(), GV.getVisibility());
DGV->setVisibility(Visibility);
GV.setVisibility(Visibility);
IR: Introduce local_unnamed_addr attribute. If a local_unnamed_addr attribute is attached to a global, the address is known to be insignificant within the module. It is distinct from the existing unnamed_addr attribute in that it only describes a local property of the module rather than a global property of the symbol. This attribute is intended to be used by the code generator and LTO to allow the linker to decide whether the global needs to be in the symbol table. It is possible to exclude a global from the symbol table if three things are true: - This attribute is present on every instance of the global (which means that the normal rule that the global must have a unique address can be broken without being observable by the program by performing comparisons against the global's address) - The global has linkonce_odr linkage (which means that each linkage unit must have its own copy of the global if it requires one, and the copy in each linkage unit must be the same) - It is a constant or a function (which means that the program cannot observe that the unique-address rule has been broken by writing to the global) Although this attribute could in principle be computed from the module contents, LTO clients (i.e. linkers) will normally need to be able to compute this property as part of symbol resolution, and it would be inefficient to materialize every module just to compute it. See: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20160509/356401.html http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20160516/356738.html for earlier discussion. Part of the fix for PR27553. Differential Revision: http://reviews.llvm.org/D20348 llvm-svn: 272709
2016-06-15 05:01:22 +08:00
GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::getMinUnnamedAddr(
DGV->getUnnamedAddr(), GV.getUnnamedAddr());
DGV->setUnnamedAddr(UnnamedAddr);
GV.setUnnamedAddr(UnnamedAddr);
}
if (!DGV && !shouldOverrideFromSrc() &&
(GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
GV.hasAvailableExternallyLinkage()))
return false;
if (GV.isDeclaration())
return false;
if (const Comdat *SC = GV.getComdat()) {
bool LinkFromSrc;
Comdat::SelectionKind SK;
std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
if (!LinkFromSrc)
return false;
}
bool LinkFromSrc = true;
if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
return true;
if (LinkFromSrc)
ValuesToLink.insert(&GV);
return false;
}
void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) {
// Add these to the internalize list
if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() &&
!shouldLinkOnlyNeeded())
return;
if (InternalizeCallback)
Internalize.insert(GV.getName());
Add(GV);
const Comdat *SC = GV.getComdat();
if (!SC)
return;
for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
GlobalValue *DGV = getLinkedToGlobal(GV2);
bool LinkFromSrc = true;
if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
return;
if (!LinkFromSrc)
continue;
if (InternalizeCallback)
Internalize.insert(GV2->getName());
Add(*GV2);
}
}
void ModuleLinker::dropReplacedComdat(
GlobalValue &GV, const DenseSet<const Comdat *> &ReplacedDstComdats) {
Comdat *C = GV.getComdat();
if (!C)
return;
if (!ReplacedDstComdats.count(C))
return;
if (GV.use_empty()) {
GV.eraseFromParent();
return;
}
if (auto *F = dyn_cast<Function>(&GV)) {
F->deleteBody();
} else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
Var->setInitializer(nullptr);
} else {
auto &Alias = cast<GlobalAlias>(GV);
Module &M = *Alias.getParent();
PointerType &Ty = *cast<PointerType>(Alias.getType());
GlobalValue *Declaration;
if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
} else {
Declaration =
new GlobalVariable(M, Ty.getElementType(), /*isConstant*/ false,
GlobalValue::ExternalLinkage,
/*Initializer*/ nullptr);
}
Declaration->takeName(&Alias);
Alias.replaceAllUsesWith(Declaration);
Alias.eraseFromParent();
}
}
bool ModuleLinker::run() {
Module &DstM = Mover.getModule();
DenseSet<const Comdat *> ReplacedDstComdats;
for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
const Comdat &C = SMEC.getValue();
if (ComdatsChosen.count(&C))
continue;
Comdat::SelectionKind SK;
bool LinkFromSrc;
if (getComdatResult(&C, SK, LinkFromSrc))
return true;
ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
if (!LinkFromSrc)
continue;
Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(C.getName());
if (DstCI == ComdatSymTab.end())
continue;
// The source comdat is replacing the dest one.
const Comdat *DstC = &DstCI->second;
ReplacedDstComdats.insert(DstC);
}
// Alias have to go first, since we are not able to find their comdats
// otherwise.
for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) {
GlobalAlias &GV = *I++;
dropReplacedComdat(GV, ReplacedDstComdats);
}
for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) {
GlobalVariable &GV = *I++;
dropReplacedComdat(GV, ReplacedDstComdats);
}
for (auto I = DstM.begin(), E = DstM.end(); I != E;) {
Function &GV = *I++;
dropReplacedComdat(GV, ReplacedDstComdats);
}
for (GlobalVariable &GV : SrcM->globals())
if (GV.hasLinkOnceLinkage())
if (const Comdat *SC = GV.getComdat())
LazyComdatMembers[SC].push_back(&GV);
for (Function &SF : *SrcM)
if (SF.hasLinkOnceLinkage())
if (const Comdat *SC = SF.getComdat())
LazyComdatMembers[SC].push_back(&SF);
for (GlobalAlias &GA : SrcM->aliases())
if (GA.hasLinkOnceLinkage())
if (const Comdat *SC = GA.getComdat())
LazyComdatMembers[SC].push_back(&GA);
// Insert all of the globals in src into the DstM module... without linking
// initializers (which could refer to functions not yet mapped over).
for (GlobalVariable &GV : SrcM->globals())
if (linkIfNeeded(GV))
return true;
for (Function &SF : *SrcM)
if (linkIfNeeded(SF))
return true;
for (GlobalAlias &GA : SrcM->aliases())
if (linkIfNeeded(GA))
return true;
for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
GlobalValue *GV = ValuesToLink[I];
const Comdat *SC = GV->getComdat();
if (!SC)
continue;
for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
GlobalValue *DGV = getLinkedToGlobal(GV2);
bool LinkFromSrc = true;
if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2))
return true;
if (LinkFromSrc)
ValuesToLink.insert(GV2);
}
}
Restore "Move metadata linking after lazy global materialization/linking." Summary: This reverts commit r251965. Restore "Move metadata linking after lazy global materialization/linking." This restores commit r251926, with fixes for the LTO bootstrapping bot failure. The bot failure was caused by references from debug metadata to otherwise unreferenced globals. Previously, this caused the lazy linking to link in their defs, which is unnecessary. With this patch, because lazy linking is complete when we encounter the metadata reference, the materializer created a declaration. For definitions such as aliases and comdats, it is illegal to have a declaration. Furthermore, metadata linking should not change code generation. Therefore, when linking of global value bodies is complete, the materializer will simply return nullptr as the new reference for the linked metadata. This change required fixing a different test to ensure there was a real reference to a linkonce global that was only being reference from metadata. Note that the new changes to the only-needed-named-metadata.ll test illustrate an issue with llvm-link -only-needed handling of comdat groups, whereby it may result in an incomplete comdat group. I note this in the test comments, but the issue is orthogonal to this patch (it can be reproduced without any metadata at head). Reviewers: dexonsmith, rafael, tra Subscribers: tobiasvk, joker.eph, llvm-commits Differential Revision: http://reviews.llvm.org/D14447 llvm-svn: 252320
2015-11-07 01:50:53 +08:00
if (InternalizeCallback) {
for (GlobalValue *GV : ValuesToLink)
Internalize.insert(GV->getName());
}
// FIXME: Propagate Errors through to the caller instead of emitting
// diagnostics.
bool HasErrors = false;
if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
[this](GlobalValue &GV, IRMover::ValueAdder Add) {
addLazyFor(GV, Add);
},
/* IsPerformingImport */ false)) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
HasErrors = true;
});
}
if (HasErrors)
return true;
if (InternalizeCallback)
InternalizeCallback(DstM, Internalize);
return false;
}
Linker::Linker(Module &M) : Mover(M) {}
bool Linker::linkInModule(
std::unique_ptr<Module> Src, unsigned Flags,
std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
ModuleLinker ModLinker(Mover, std::move(Src), Flags,
std::move(InternalizeCallback));
return ModLinker.run();
}
//===----------------------------------------------------------------------===//
// LinkModules entrypoint.
//===----------------------------------------------------------------------===//
/// This function links two modules together, with the resulting Dest module
/// modified to be the composite of the two input modules. If an error occurs,
/// true is returned and ErrorMsg (if not null) is set to indicate the problem.
/// Upon failure, the Dest module could be in a modified state, and shouldn't be
/// relied on to be consistent.
bool Linker::linkModules(
Module &Dest, std::unique_ptr<Module> Src, unsigned Flags,
std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
Linker L(Dest);
return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback));
}
//===----------------------------------------------------------------------===//
// C API.
//===----------------------------------------------------------------------===//
LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) {
Module *D = unwrap(Dest);
std::unique_ptr<Module> M(unwrap(Src));
return Linker::linkModules(*D, std::move(M));
}