TransformUtils: Introduce module splitter.

The module splitter splits a module into linkable partitions. It will
be used to implement parallel LTO code generation.

This initial version of the splitter does not attempt to deal with the
somewhat subtle symbol visibility issues around module splitting. These
will be dealt with in a future change.

Differential Revision: http://reviews.llvm.org/D12132

llvm-svn: 245662
This commit is contained in:
Peter Collingbourne 2015-08-21 02:48:20 +00:00
parent cf61aae163
commit 1dc6a8d179
19 changed files with 413 additions and 1 deletions

View File

@ -23,6 +23,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <functional>
namespace llvm {
@ -52,6 +53,14 @@ class DominatorTree;
Module *CloneModule(const Module *M);
Module *CloneModule(const Module *M, ValueToValueMapTy &VMap);
/// Return a copy of the specified module. The ShouldCloneDefinition function
/// controls whether a specific GlobalValue's definition is cloned. If the
/// function returns false, the module copy will contain an external reference
/// in place of the global definition.
Module *
CloneModule(const Module *M, ValueToValueMapTy &VMap,
std::function<bool(const GlobalValue *)> ShouldCloneDefinition);
/// ClonedCodeInfo - This struct can be used to capture information about code
/// being cloned, while it is being cloned.
struct ClonedCodeInfo {

View File

@ -0,0 +1,43 @@
//===- SplitModule.h - Split a module into partitions -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the function llvm::SplitModule, which splits a module
// into multiple linkable partitions. It can be used to implement parallel code
// generation for link-time optimization.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULE_H
#define LLVM_TRANSFORMS_UTILS_SPLITMODULE_H
#include <functional>
#include <memory>
namespace llvm {
class Module;
class StringRef;
/// Splits the module M into N linkable partitions. The function ModuleCallback
/// is called N times passing each individual partition as the MPart argument.
///
/// FIXME: This function does not deal with the somewhat subtle symbol
/// visibility issues around module splitting, including (but not limited to):
///
/// - Internal symbols should not collide with symbols defined outside the
/// module.
/// - Internal symbols defined in module-level inline asm should be visible to
/// each partition.
void SplitModule(
std::unique_ptr<Module> M, unsigned N,
std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback);
} // End llvm namespace
#endif

View File

@ -34,6 +34,7 @@ add_llvm_library(LLVMTransformUtils
SimplifyIndVar.cpp
SimplifyInstructions.cpp
SimplifyLibCalls.cpp
SplitModule.cpp
SymbolRewriter.cpp
UnifyFunctionExitNodes.cpp
Utils.cpp

View File

@ -33,6 +33,12 @@ Module *llvm::CloneModule(const Module *M) {
}
Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });
}
Module *llvm::CloneModule(
const Module *M, ValueToValueMapTy &VMap,
std::function<bool(const GlobalValue *)> ShouldCloneDefinition) {
// First off, we need to create the new module.
Module *New = new Module(M->getModuleIdentifier(), M->getContext());
New->setDataLayout(M->getDataLayout());
@ -68,6 +74,26 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
if (!ShouldCloneDefinition(I)) {
// An alias cannot act as an external reference, so we need to create
// either a function or a global variable depending on the value type.
// FIXME: Once pointee types are gone we can probably pick one or the
// other.
GlobalValue *GV;
if (I->getValueType()->isFunctionTy())
GV = Function::Create(cast<FunctionType>(I->getValueType()),
GlobalValue::ExternalLinkage, I->getName(), New);
else
GV = new GlobalVariable(
*New, I->getValueType(), false, GlobalValue::ExternalLinkage,
(Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr,
I->getThreadLocalMode(), I->getType()->getAddressSpace());
VMap[I] = GV;
// We do not copy attributes (mainly because copying between different
// kinds of globals is forbidden), but this is generally not required for
// correctness.
continue;
}
auto *PTy = cast<PointerType>(I->getType());
auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New);
GA->copyAttributesFrom(I);
@ -81,6 +107,11 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I) {
GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (!ShouldCloneDefinition(I)) {
// Skip after setting the correct linkage for an external reference.
GV->setLinkage(GlobalValue::ExternalLinkage);
continue;
}
if (I->hasInitializer())
GV->setInitializer(MapValue(I->getInitializer(), VMap));
}
@ -89,6 +120,11 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
//
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
Function *F = cast<Function>(VMap[I]);
if (!ShouldCloneDefinition(I)) {
// Skip after setting the correct linkage for an external reference.
F->setLinkage(GlobalValue::ExternalLinkage);
continue;
}
if (!I->isDeclaration()) {
Function::arg_iterator DestI = F->arg_begin();
for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
@ -109,6 +145,9 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
// And aliases
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
// We already dealt with undefined aliases above.
if (!ShouldCloneDefinition(I))
continue;
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
if (const Constant *C = I->getAliasee())
GA->setAliasee(MapValue(C, VMap));

View File

@ -0,0 +1,85 @@
//===- SplitModule.cpp - Split a module into partitions -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the function llvm::SplitModule, which splits a module
// into multiple linkable partitions. It can be used to implement parallel code
// generation for link-time optimization.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SplitModule.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;
static void externalize(GlobalValue *GV) {
if (GV->hasLocalLinkage()) {
GV->setLinkage(GlobalValue::ExternalLinkage);
GV->setVisibility(GlobalValue::HiddenVisibility);
}
// Unnamed entities must be named consistently between modules. setName will
// give a distinct name to each such entity.
if (!GV->hasName())
GV->setName("__llvmsplit_unnamed");
}
// Returns whether GV should be in partition (0-based) I of N.
static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
if (auto GA = dyn_cast<GlobalAlias>(GV))
if (const GlobalObject *Base = GA->getBaseObject())
GV = Base;
StringRef Name;
if (const Comdat *C = GV->getComdat())
Name = C->getName();
else
Name = GV->getName();
// Partition by MD5 hash. We only need a few bits for evenness as the number
// of partitions will generally be in the 1-2 figure range; the low 16 bits
// are enough.
MD5 H;
MD5::MD5Result R;
H.update(Name);
H.final(R);
return (R[0] | (R[1] << 8)) % N == I;
}
void llvm::SplitModule(
std::unique_ptr<Module> M, unsigned N,
std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
for (Function &F : *M)
externalize(&F);
for (GlobalVariable &GV : M->globals())
externalize(&GV);
for (GlobalAlias &GA : M->aliases())
externalize(&GA);
// FIXME: We should be able to reuse M as the last partition instead of
// cloning it.
for (unsigned I = 0; I != N; ++I) {
ValueToValueMapTy VMap;
std::unique_ptr<Module> MPart(
CloneModule(M.get(), VMap, [=](const GlobalValue *GV) {
return isInPartition(GV, I, N);
}));
if (I != 0)
MPart->setModuleInlineAsm("");
ModuleCallback(std::move(MPart));
}
}

View File

@ -46,6 +46,7 @@ set(LLVM_TEST_DEPENDS
llvm-readobj
llvm-rtdyld
llvm-size
llvm-split
llvm-symbolizer
llvm-tblgen
macho-dump

View File

@ -248,6 +248,7 @@ for pattern in [r"\bbugpoint\b(?!-)",
r"\bllvm-readobj\b",
r"\bllvm-rtdyld\b",
r"\bllvm-size\b",
r"\bllvm-split\b",
r"\bllvm-tblgen\b",
r"\bllvm-c-test\b",
r"\bmacho-dump\b",

View File

@ -0,0 +1,19 @@
; RUN: llvm-split -o %t %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; CHECK0-DAG: @afoo = alias [2 x i8*]* @foo
; CHECK1-DAG: @afoo = external global [2 x i8*]
@afoo = alias [2 x i8*]* @foo
; CHECK0-DAG: declare void @abar()
; CHECK1-DAG: @abar = alias void ()* @bar
@abar = alias void ()* @bar
@foo = global [2 x i8*] [i8* bitcast (void ()* @bar to i8*), i8* bitcast (void ()* @abar to i8*)]
define void @bar() {
store [2 x i8*] zeroinitializer, [2 x i8*]* @foo
store [2 x i8*] zeroinitializer, [2 x i8*]* @afoo
ret void
}

View File

@ -0,0 +1,19 @@
; RUN: llvm-split -o %t %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
$foo = comdat any
; CHECK0: define void @foo()
; CHECK1: declare void @foo()
define void @foo() comdat {
call void @bar()
ret void
}
; CHECK0: define void @bar()
; CHECK1: declare void @bar()
define void @bar() comdat($foo) {
call void @foo()
ret void
}

View File

@ -0,0 +1,17 @@
; RUN: llvm-split -o %t %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; CHECK0: define void @foo()
; CHECK1: declare void @foo()
define void @foo() {
call void @bar()
ret void
}
; CHECK0: declare void @bar()
; CHECK1: define void @bar()
define void @bar() {
call void @foo()
ret void
}

View File

@ -0,0 +1,11 @@
; RUN: llvm-split -o %t %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; CHECK0: @foo = global i8* bitcast
; CHECK1: @foo = external global i8*
@foo = global i8* bitcast (i8** @bar to i8*)
; CHECK0: @bar = external global i8*
; CHECK1: @bar = global i8* bitcast
@bar = global i8* bitcast (i8** @foo to i8*)

View File

@ -0,0 +1,17 @@
; RUN: llvm-split -o %t %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; CHECK0: define hidden void @foo()
; CHECK1: declare hidden void @foo()
define internal void @foo() {
call void @bar()
ret void
}
; CHECK0: declare void @bar()
; CHECK1: define void @bar()
define void @bar() {
call void @foo()
ret void
}

View File

@ -0,0 +1,31 @@
; RUN: llvm-split -o %t %s
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; CHECK0: declare hidden void @__llvmsplit_unnamed()
; CHECK1: define hidden void @__llvmsplit_unnamed()
define internal void @0() {
; CHECK1: call void @foo()
call void @foo()
ret void
}
; CHECK0: declare hidden void @__llvmsplit_unnamed1()
; CHECK1: define hidden void @__llvmsplit_unnamed1()
define internal void @1() {
; CHECK1: call void @foo()
; CHECK1: call void @foo()
call void @foo()
call void @foo()
ret void
}
; CHECK0: define void @foo()
; CHECK1: declare void @foo()
define void @foo() {
; CHECK0: call void @__llvmsplit_unnamed1()
; CHECK0: call void @__llvmsplit_unnamed()
call void @1()
call void @0()
ret void
}

View File

@ -40,6 +40,7 @@ subdirectories =
llvm-profdata
llvm-rtdyld
llvm-size
llvm-split
macho-dump
opt
verify-uselistorder

View File

@ -32,7 +32,8 @@ PARALLEL_DIRS := opt llvm-as llvm-dis llc llvm-ar llvm-nm llvm-link \
macho-dump llvm-objdump llvm-readobj llvm-rtdyld \
llvm-dwarfdump llvm-cov llvm-size llvm-stress llvm-mcmarkup \
llvm-profdata llvm-symbolizer obj2yaml yaml2obj llvm-c-test \
llvm-cxxdump verify-uselistorder dsymutil llvm-pdbdump
llvm-cxxdump verify-uselistorder dsymutil llvm-pdbdump \
llvm-split
# If Intel JIT Events support is configured, build an extra tool to test it.
ifeq ($(USE_INTEL_JITEVENTS), 1)

View File

@ -0,0 +1,11 @@
set(LLVM_LINK_COMPONENTS
TransformUtils
BitWriter
Core
IRReader
Support
)
add_llvm_tool(llvm-split
llvm-split.cpp
)

View File

@ -0,0 +1,22 @@
;===- ./tools/llvm-split/LLVMBuild.txt -------------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Tool
name = llvm-split
parent = Tools
required_libraries = TransformUtils BitWriter Core IRReader Support

View File

@ -0,0 +1,17 @@
##===- tools/llvm-split/Makefile ---------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL := ../..
TOOLNAME := llvm-split
LINK_COMPONENTS := transformutils bitwriter core irreader support
# This tool has no plugins, optimize startup time.
TOOL_NO_EXPORTS := 1
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,67 @@
//===-- llvm-split: command line tool for testing module splitter ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This program can be used to test the llvm::SplitModule function.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/SplitModule.h"
using namespace llvm;
static cl::opt<std::string>
InputFilename(cl::Positional, cl::desc("<input bitcode file>"),
cl::init("-"), cl::value_desc("filename"));
static cl::opt<std::string>
OutputFilename("o", cl::desc("Override output filename"),
cl::value_desc("filename"));
static cl::opt<unsigned> NumOutputs("j", cl::Prefix, cl::init(2),
cl::desc("Number of output files"));
int main(int argc, char **argv) {
LLVMContext &Context = getGlobalContext();
SMDiagnostic Err;
cl::ParseCommandLineOptions(argc, argv, "LLVM module splitter\n");
std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
if (!M) {
Err.print(argv[0], errs());
return 1;
}
unsigned I = 0;
SplitModule(std::move(M), NumOutputs, [&](std::unique_ptr<Module> MPart) {
std::error_code EC;
std::unique_ptr<tool_output_file> Out(new tool_output_file(
OutputFilename + utostr(I++), EC, sys::fs::F_None));
if (EC) {
errs() << EC.message() << '\n';
exit(1);
}
WriteBitcodeToFile(MPart.get(), Out->os());
// Declare success.
Out->keep();
});
return 0;
}