[ThinLTO] Add an API to trigger file-based API for returning objects to the linker

Summary:
The motivation is to support better the -object_path_lto option on
Darwin. The linker needs to write down the generate object files on
disk for later use by lldb or dsymutil (debug info are not present
in the final binary). We're moving this into libLTO so that we can
be smarter when a cache is enabled and hard-link when possible
instead of duplicating the files.

Reviewers: tejohnson, deadalnix, pcc

Subscribers: dexonsmith, llvm-commits

Differential Revision: https://reviews.llvm.org/D27507

llvm-svn: 289631
This commit is contained in:
Mehdi Amini 2016-12-14 04:56:42 +00:00
parent 378b8c8f01
commit 8e13bc4562
10 changed files with 239 additions and 24 deletions

View File

@ -44,7 +44,7 @@ typedef bool lto_bool_t;
* @{
*/
#define LTO_API_VERSION 20
#define LTO_API_VERSION 21
/**
* \since prior to LTO_API_VERSION=3
@ -636,6 +636,29 @@ extern unsigned int thinlto_module_get_num_objects(thinlto_code_gen_t cg);
extern LTOObjectBuffer thinlto_module_get_object(thinlto_code_gen_t cg,
unsigned int index);
/**
* Returns the number of object files produced by the ThinLTO CodeGenerator.
*
* It usually matches the number of input files, but this is not a guarantee of
* the API and may change in future implementation, so the client should not
* assume it.
*
* \since LTO_API_VERSION=21
*/
unsigned int thinlto_module_get_num_object_files(thinlto_code_gen_t cg);
/**
* Returns the path to the ith object file produced by the ThinLTO
* CodeGenerator.
*
* Client should use \p thinlto_module_get_num_object_files() to get the number
* of available objects.
*
* \since LTO_API_VERSION=21
*/
const char *thinlto_module_get_object_file(thinlto_code_gen_t cg,
unsigned int index);
/**
* Sets which PIC code model to generate.
* Returns true on error (check lto_get_error_message() for details).
@ -724,6 +747,17 @@ extern void thinlto_codegen_set_cache_entry_expiration(thinlto_code_gen_t cg,
extern void thinlto_codegen_set_savetemps_dir(thinlto_code_gen_t cg,
const char *save_temps_dir);
/**
* Set the path to a directory where to save generated object files. This
* path can be used by a linker to request on-disk files instead of in-memory
* buffers. When set, results are available through
* thinlto_module_get_object_file() instead of thinlto_module_get_object().
*
* \since LTO_API_VERSION=21
*/
void thinlto_set_generated_objects_dir(thinlto_code_gen_t cg,
const char *save_temps_dir);
/**
* Sets the cpu to generate code for.
*

View File

@ -72,17 +72,31 @@ public:
/**
* Process all the modules that were added to the code generator in parallel.
*
* Client can access the resulting object files using getProducedBinaries()
* Client can access the resulting object files using getProducedBinaries(),
* unless setGeneratedObjectsDirectory() has been called, in which case
* results are available through getProducedBinaryFiles().
*/
void run();
/**
* Return the "in memory" binaries produced by the code generator.
* Return the "in memory" binaries produced by the code generator. This is
* filled after run() unless setGeneratedObjectsDirectory() has been
* called, in which case results are available through
* getProducedBinaryFiles().
*/
std::vector<std::unique_ptr<MemoryBuffer>> &getProducedBinaries() {
return ProducedBinaries;
}
/**
* Return the "on-disk" binaries produced by the code generator. This is
* filled after run() when setGeneratedObjectsDirectory() has been
* called, in which case results are available through getProducedBinaries().
*/
std::vector<std::string> &getProducedBinaryFiles() {
return ProducedBinaryFiles;
}
/**
* \defgroup Options setters
* @{
@ -156,6 +170,14 @@ public:
/// the processing.
void setSaveTempsDir(std::string Path) { SaveTempsDir = std::move(Path); }
/// Set the path to a directory where to save generated object files. This
/// path can be used by a linker to request on-disk files instead of in-memory
/// buffers. When set, results are available through getProducedBinaryFiles()
/// instead of getProducedBinaries().
void setGeneratedObjectsDirectory(std::string Path) {
SavedObjectsDirectoryPath = std::move(Path);
}
/// CPU to use to initialize the TargetMachine
void setCpu(std::string Cpu) { TMBuilder.MCpu = std::move(Cpu); }
@ -244,9 +266,13 @@ private:
/// Helper factory to build a TargetMachine
TargetMachineBuilder TMBuilder;
/// Vector holding the in-memory buffer containing the produced binaries.
/// Vector holding the in-memory buffer containing the produced binaries, when
/// SavedObjectsDirectoryPath isn't set.
std::vector<std::unique_ptr<MemoryBuffer>> ProducedBinaries;
/// Path to generated files in the supplied SavedObjectsDirectoryPath if any.
std::vector<std::string> ProducedBinaryFiles;
/// Vector holding the input buffers containing the bitcode modules to
/// process.
std::vector<MemoryBufferRef> Modules;
@ -264,6 +290,9 @@ private:
/// Path to a directory to save the temporary bitcode files.
std::string SaveTempsDir;
/// Path to a directory to save the generated object files.
std::string SavedObjectsDirectoryPath;
/// Flag to enable/disable CodeGen. When set to true, the process stops after
/// optimizations and a bitcode is produced.
bool DisableCodeGen = false;

View File

@ -342,6 +342,14 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting = true,
/// specific error_code.
std::error_code create_link(const Twine &to, const Twine &from);
/// Create a hard link from \a from to \a to, or return an error.
///
/// @param to The path to hard link to.
/// @param from The path to hard link from. This is created.
/// @returns errc::success if the link was created, otherwise a platform
/// specific error_code.
std::error_code create_hard_link(const Twine &to, const Twine &from);
/// @brief Get the current path.
///
/// @param result Holds the current path on return.

View File

@ -343,10 +343,9 @@ public:
}
// Cache the Produced object file
std::unique_ptr<MemoryBuffer>
write(std::unique_ptr<MemoryBuffer> OutputBuffer) {
void write(const MemoryBuffer &OutputBuffer) {
if (EntryPath.empty())
return OutputBuffer;
return;
// Write to a temporary to avoid race condition
SmallString<128> TempFilename;
@ -359,7 +358,7 @@ public:
}
{
raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
OS << OutputBuffer->getBuffer();
OS << OutputBuffer.getBuffer();
}
// Rename to final destination (hopefully race condition won't matter here)
EC = sys::fs::rename(TempFilename, EntryPath);
@ -369,16 +368,8 @@ public:
if (EC)
report_fatal_error(Twine("Failed to open ") + EntryPath +
" to save cached entry\n");
OS << OutputBuffer->getBuffer();
OS << OutputBuffer.getBuffer();
}
auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath);
if (auto EC = ReloadedBufferOrErr.getError()) {
// FIXME diagnose
errs() << "error: can't reload cached file '" << EntryPath
<< "': " << EC.message() << "\n";
return OutputBuffer;
}
return std::move(*ReloadedBufferOrErr);
}
};
@ -745,6 +736,43 @@ std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
return codegenModule(TheModule, *TMBuilder.create());
}
/// Write out the generated object file, either from CacheEntryPath or from
/// OutputBuffer, preferring hard-link when possible.
/// Returns the path to the generated file in SavedObjectsDirectoryPath.
static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
StringRef SavedObjectsDirectoryPath,
const MemoryBuffer &OutputBuffer) {
SmallString<128> OutputPath(SavedObjectsDirectoryPath);
llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o");
OutputPath.c_str(); // Ensure the string is null terminated.
if (sys::fs::exists(OutputPath))
sys::fs::remove(OutputPath);
// We don't return a memory buffer to the linker, just a list of files.
if (!CacheEntryPath.empty()) {
// Cache is enabled, hard-link the entry (or copy if hard-link fails).
auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath);
if (!Err)
return OutputPath.str();
// Hard linking failed, try to copy.
Err = sys::fs::copy_file(CacheEntryPath, OutputPath);
if (!Err)
return OutputPath.str();
// Copy failed (could be because the CacheEntry was removed from the cache
// in the meantime by another process), fall back and try to write down the
// buffer to the output.
errs() << "error: can't link or copy from cached entry '" << CacheEntryPath
<< "' to '" << OutputPath << "'\n";
}
// No cache entry, just write out the buffer.
std::error_code Err;
raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None);
if (Err)
report_fatal_error("Can't open output '" + OutputPath + "'\n");
OS << OutputBuffer.getBuffer();
return OutputPath.str();
}
// Main entry point for the ThinLTO processing
void ThinLTOCodeGenerator::run() {
if (CodeGenOnly) {
@ -785,7 +813,16 @@ void ThinLTOCodeGenerator::run() {
// Prepare the resulting object vector
assert(ProducedBinaries.empty() && "The generator should not be reused");
ProducedBinaries.resize(Modules.size());
if (SavedObjectsDirectoryPath.empty())
ProducedBinaries.resize(Modules.size());
else {
sys::fs::create_directories(SavedObjectsDirectoryPath);
bool IsDir;
sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
if (!IsDir)
report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
ProducedBinaryFiles.resize(Modules.size());
}
// Prepare the module map.
auto ModuleMap = generateModuleMap(Modules);
@ -865,16 +902,22 @@ void ThinLTOCodeGenerator::run() {
ImportLists[ModuleIdentifier], ExportList,
ResolvedODR[ModuleIdentifier],
DefinedFunctions, GUIDPreservedSymbols);
auto CacheEntryPath = CacheEntry.getEntryPath();
{
auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
<< CacheEntry.getEntryPath() << "' for buffer " << count
<< " " << ModuleIdentifier << "\n");
<< CacheEntryPath << "' for buffer " << count << " "
<< ModuleIdentifier << "\n");
if (ErrOrBuffer) {
// Cache Hit!
ProducedBinaries[count] = std::move(ErrOrBuffer.get());
if (SavedObjectsDirectoryPath.empty())
ProducedBinaries[count] = std::move(ErrOrBuffer.get());
else
ProducedBinaryFiles[count] = writeGeneratedObject(
count, CacheEntryPath, SavedObjectsDirectoryPath,
*ErrOrBuffer.get());
return;
}
}
@ -903,8 +946,32 @@ void ThinLTOCodeGenerator::run() {
ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions,
DisableCodeGen, SaveTempsDir, count);
OutputBuffer = CacheEntry.write(std::move(OutputBuffer));
ProducedBinaries[count] = std::move(OutputBuffer);
// Commit to the cache (if enabled)
CacheEntry.write(*OutputBuffer);
if (SavedObjectsDirectoryPath.empty()) {
// We need to generated a memory buffer for the linker.
if (!CacheEntryPath.empty()) {
// Cache is enabled, reload from the cache
// We do this to lower memory pressuree: the buffer is on the heap
// and releasing it frees memory that can be used for the next input
// file. The final binary link will read from the VFS cache
// (hopefully!) or from disk if the memory pressure wasn't too high.
auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer();
if (auto EC = ReloadedBufferOrErr.getError()) {
// On error, keeping the preexisting buffer and printing a
// diagnostic is more friendly than just crashing.
errs() << "error: can't reload cached file '" << CacheEntryPath
<< "': " << EC.message() << "\n";
} else {
OutputBuffer = std::move(*ReloadedBufferOrErr);
}
}
ProducedBinaries[count] = std::move(OutputBuffer);
return;
}
ProducedBinaryFiles[count] = writeGeneratedObject(
count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer);
}, IndexCount);
}
}

View File

@ -285,6 +285,19 @@ std::error_code create_link(const Twine &to, const Twine &from) {
return std::error_code();
}
std::error_code create_hard_link(const Twine &to, const Twine &from) {
// Get arguments.
SmallString<128> from_storage;
SmallString<128> to_storage;
StringRef f = from.toNullTerminatedStringRef(from_storage);
StringRef t = to.toNullTerminatedStringRef(to_storage);
if (::link(t.begin(), f.begin()) == -1)
return std::error_code(errno, std::generic_category());
return std::error_code();
}
std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
SmallString<128> path_storage;
StringRef p = path.toNullTerminatedStringRef(path_storage);

View File

@ -232,6 +232,10 @@ std::error_code create_link(const Twine &to, const Twine &from) {
return std::error_code();
}
std::error_code create_hard_link(const Twine &to, const Twine &from) {
return create_link(to, from);
}
std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
SmallVector<wchar_t, 128> path_utf16;

View File

@ -0,0 +1,30 @@
; RUN: opt -module-hash -module-summary %s -o %t.bc
; RUN: opt -module-hash -module-summary %p/Inputs/cache.ll -o %t2.bc
; Check that the generating object files is working without cache
; RUN: rm -Rf %t.thin.out
; RUN: llvm-lto -thinlto-save-objects=%t.thin.out -thinlto-action=run %t2.bc %t.bc -exported-symbol=main
; RUN: ls %t.thin.out | count 2
; Same with cache
; RUN: rm -Rf %t.thin.out
; RUN: rm -Rf %t.cache && mkdir %t.cache
; RUN: llvm-lto -thinlto-save-objects=%t.thin.out -thinlto-action=run %t2.bc %t.bc -exported-symbol=main -thinlto-cache-dir %t.cache
; RUN: ls %t.thin.out | count 2
; RUN: ls %t.cache | count 3
; Same with hot cache
; RUN: rm -Rf %t.thin.out
; RUN: rm -Rf %t.cache && mkdir %t.cache
; RUN: llvm-lto -thinlto-save-objects=%t.thin.out -thinlto-action=run %t2.bc %t.bc -exported-symbol=main -thinlto-cache-dir %t.cache
; RUN: ls %t.thin.out | count 2
; RUN: ls %t.cache | count 3
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
define void @globalfunc() #0 {
entry:
ret void
}

View File

@ -130,6 +130,11 @@ static cl::opt<std::string> ThinLTOSaveTempsPrefix(
cl::desc("Save ThinLTO temp files using filenames created by adding "
"suffixes to the given file path prefix."));
static cl::opt<std::string> ThinLTOGeneratedObjectsDir(
"thinlto-save-objects",
cl::desc("Save ThinLTO generated object files using filenames created in "
"the given directory."));
static cl::opt<bool>
SaveModuleFile("save-merged-module", cl::init(false),
cl::desc("Write merged LTO module to file before CodeGen"));
@ -707,6 +712,13 @@ private:
if (!ThinLTOSaveTempsPrefix.empty())
ThinGenerator.setSaveTempsDir(ThinLTOSaveTempsPrefix);
if (!ThinLTOGeneratedObjectsDir.empty()) {
ThinGenerator.setGeneratedObjectsDirectory(ThinLTOGeneratedObjectsDir);
ThinGenerator.run();
return;
}
ThinGenerator.run();
auto &Binaries = ThinGenerator.getProducedBinaries();

View File

@ -488,6 +488,16 @@ LTOObjectBuffer thinlto_module_get_object(thinlto_code_gen_t cg,
MemBuffer->getBufferSize()};
}
unsigned int thinlto_module_get_num_object_files(thinlto_code_gen_t cg) {
return unwrap(cg)->getProducedBinaryFiles().size();
}
const char *thinlto_module_get_object_file(thinlto_code_gen_t cg,
unsigned int index) {
assert(index < unwrap(cg)->getProducedBinaryFiles().size() &&
"Index overflow");
return unwrap(cg)->getProducedBinaryFiles()[index].c_str();
}
void thinlto_codegen_disable_codegen(thinlto_code_gen_t cg,
lto_bool_t disable) {
unwrap(cg)->disableCodeGen(disable);
@ -551,6 +561,11 @@ void thinlto_codegen_set_savetemps_dir(thinlto_code_gen_t cg,
return unwrap(cg)->setSaveTempsDir(save_temps_dir);
}
void thinlto_set_generated_objects_dir(thinlto_code_gen_t cg,
const char *save_temps_dir) {
unwrap(cg)->setGeneratedObjectsDirectory(save_temps_dir);
}
lto_bool_t thinlto_codegen_set_pic_model(thinlto_code_gen_t cg,
lto_codegen_model model) {
switch (model) {

View File

@ -64,4 +64,7 @@ thinlto_codegen_add_must_preserve_symbol
thinlto_codegen_add_cross_referenced_symbol
thinlto_codegen_set_final_cache_size_relative_to_available_space
thinlto_codegen_set_codegen_only
thinlto_codegen_disable_codegen
thinlto_codegen_disable_codegen
thinlto_module_get_num_object_files
thinlto_module_get_object_file
thinlto_set_generated_objects_dir