[OpenMP] Add better testing for the linker wrapper

The linker wrapper is used to perform linking and wrapping of embedded
device object files. Currently its internals are not able to be tested
easily. This patch adds the `--dry-run` and `--print-wrapped-module`
options to investigate the link jobs that will be run along with the
wrapped code that will be created to register the binaries.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D124039
This commit is contained in:
Joseph Huber 2022-04-19 14:14:16 -04:00
parent 497a5f0415
commit 260c5df2d5
6 changed files with 128 additions and 42 deletions

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,27 @@
// REQUIRES: x86-registered-target
// RUN: %clang -cc1 %s -emit-obj -o %t.o \
// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run -linker-path /usr/bin/ld \
// RUN: -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=OPENMP
// OPENMP: @__start_omp_offloading_entries = external hidden constant %__tgt_offload_entry
// OPENMP-NEXT: @__stop_omp_offloading_entries = external hidden constant %__tgt_offload_entry
// OPENMP-NEXT: @__dummy.omp_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries"
// OPENMP-NEXT: @.omp_offloading.device_image = internal unnamed_addr constant [0 x i8] zeroinitializer
// OPENMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { i8* getelementptr inbounds ([0 x i8], [0 x i8]* @.omp_offloading.device_image, i64 0, i64 0), i8* getelementptr inbounds ([0 x i8], [0 x i8]* @.omp_offloading.device_image, i64 0, i64 0), %__tgt_offload_entry* @__start_omp_offloading_entries, %__tgt_offload_entry* @__stop_omp_offloading_entries }]
// OPENMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, %__tgt_device_image* getelementptr inbounds ([1 x %__tgt_device_image], [1 x %__tgt_device_image]* @.omp_offloading.device_images, i64 0, i64 0), %__tgt_offload_entry* @__start_omp_offloading_entries, %__tgt_offload_entry* @__stop_omp_offloading_entries }
// OPENMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @.omp_offloading.descriptor_reg, i8* null }]
// OPENMP-NEXT: @llvm.global_dtors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @.omp_offloading.descriptor_unreg, i8* null }]
// OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
// OPENMP-NEXT: entry:
// OPENMP-NEXT: call void @__tgt_register_lib(%__tgt_bin_desc* @.omp_offloading.descriptor)
// OPENMP-NEXT: ret void
// OPENMP-NEXT: }
// OPENMP: define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" {
// OPENMP-NEXT: entry:
// OPENMP-NEXT: call void @__tgt_unregister_lib(%__tgt_bin_desc* @.omp_offloading.descriptor)
// OPENMP-NEXT: ret void
// OPENMP-NEXT: }

View File

@ -0,0 +1,40 @@
// REQUIRES: x86-registered-target
// RUN: %clang -cc1 %s -emit-obj -o %t.o \
// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70 \
// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,nvptx64-nvida-cuda,sm_70
// RUN: clang-linker-wrapper --dry-run -linker-path /usr/bin/ld -- %t.o -o a.out \
// RUN: 2>&1 | FileCheck %s --check-prefix=NVPTX_LINK
// NVPTX_LINK: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o
// RUN: %clang -cc1 %s -emit-obj -o %t.o \
// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,amdgcn-amd-amdhsam,gfx908 \
// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,amdgcn-amd-amdhsam,gfx908
// RUN: clang-linker-wrapper --dry-run -linker-path /usr/bin/ld -- %t.o -o a.out \
// RUN: 2>&1 | FileCheck %s --check-prefix=AMDGPU_LINK
// AMDGPU_LINK: lld{{.*}}-flavor gnu --no-undefined -shared -o {{.*}}.out {{.*}}.o {{.*}}.o
// RUN: %clang -cc1 %s -emit-obj -o %t.o \
// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,x86_64-unknown-linux-gnu, \
// RUN: -fembed-offload-object=%S/Inputs/dummy-elf.o,openmp,x86_64-unknown-linux-gnu,
// RUN: clang-linker-wrapper --dry-run -linker-path /usr/bin/ld.lld -- %t.o -o a.out \
// RUN: 2>&1 | FileCheck %s --check-prefix=CPU_LINK
// CPU_LINK: ld.lld{{.*}}-m elf_x86_64 -shared -Bsymbolic -o {{.*}}.out {{.*}}.o {{.*}}.o
// RUN: %clang -cc1 %s -emit-obj -o %t.o
// RUN: clang-linker-wrapper --dry-run -linker-path /usr/bin/ld.lld -- -a -b -c %t.o -o a.out \
// RUN: 2>&1 | FileCheck %s --check-prefix=HOST_LINK
// HOST_LINK: ld.lld{{.*}}-a -b -c {{.*}}.o -o a.out
// RUN: %clang -cc1 %s -emit-obj -o %t.o \
// RUN: -fembed-offload-object=%S/Inputs/dummy-bc.bc,openmp,nvptx64-nvida-cuda,sm_70 \
// RUN: -fembed-offload-object=%S/Inputs/dummy-bc.bc,openmp,nvptx64-nvida-cuda,sm_70
// RUN: clang-linker-wrapper --dry-run -linker-path /usr/bin/ld -- %t.o -o a.out \
// RUN: 2>&1 | FileCheck %s --check-prefix=LTO
// LTO: ptxas{{.*}}-m64 -o {{.*}}.cubin -O2 --gpu-name sm_70 -c {{.*}}.s
// LTO: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.cubin

View File

@ -64,7 +64,7 @@ tool_dirs = [config.clang_tools_dir, config.llvm_tools_dir]
tools = [
'apinotes-test', 'c-index-test', 'clang-diff', 'clang-format', 'clang-repl',
'clang-tblgen', 'clang-scan-deps', 'opt', 'llvm-ifs', 'yaml2obj',
'clang-tblgen', 'clang-scan-deps', 'opt', 'llvm-ifs', 'yaml2obj', 'clang-linker-wrapper',
ToolSubst('%clang_extdef_map', command=FindTool(
'clang-extdef-mapping'), unresolved='ignore'),
ToolSubst('%clang_dxc', command=config.clang,

View File

@ -92,6 +92,16 @@ static cl::opt<bool> EmbedBitcode(
cl::desc("Embed linked bitcode instead of an executable device image"),
cl::init(false), cl::cat(ClangLinkerWrapperCategory));
static cl::opt<bool> DryRun(
"dry-run", cl::ZeroOrMore,
cl::desc("List the linker commands to be run without executing them"),
cl::init(false), cl::cat(ClangLinkerWrapperCategory));
static cl::opt<bool>
PrintWrappedModule("print-wrapped-module", cl::ZeroOrMore,
cl::desc("Print the wrapped module's IR for testing"),
cl::init(false), cl::cat(ClangLinkerWrapperCategory));
static cl::opt<std::string>
HostTriple("host-triple", cl::ZeroOrMore,
cl::desc("Triple to use for the host compilation"),
@ -233,17 +243,40 @@ Error createOutputFile(const Twine &Prefix, StringRef Extension,
return Error::success();
}
/// Execute the command \p ExecutablePath with the arguments \p Args.
Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
if (Verbose || DryRun)
printCommands(Args);
if (!DryRun)
if (sys::ExecuteAndWait(ExecutablePath, Args))
return createStringError(inconvertibleErrorCode(),
"'" + sys::path::filename(ExecutablePath) + "'" +
" failed");
return Error::success();
}
Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
if (!Path)
Path = sys::findProgramByName(Name);
if (!Path && DryRun)
return Name.str();
if (!Path)
return createStringError(Path.getError(),
"Unable to find '" + Name + "' in path");
return *Path;
}
Error runLinker(std::string &LinkerPath, SmallVectorImpl<std::string> &Args) {
std::vector<StringRef> LinkerArgs;
LinkerArgs.push_back(LinkerPath);
for (auto &Arg : Args)
LinkerArgs.push_back(Arg);
if (Verbose)
printCommands(LinkerArgs);
if (sys::ExecuteAndWait(LinkerPath, LinkerArgs))
return createStringError(inconvertibleErrorCode(), "'linker' failed");
if (Error Err = executeCommands(LinkerPath, LinkerArgs))
return Err;
return Error::success();
}
@ -379,12 +412,10 @@ extractFromBinary(const ObjectFile &Obj,
// We will use llvm-strip to remove the now unneeded section containing the
// offloading code.
ErrorOr<std::string> StripPath =
sys::findProgramByName("llvm-strip", {getMainExecutable("llvm-strip")});
Expected<std::string> StripPath =
findProgram("llvm-strip", {getMainExecutable("llvm-strip")});
if (!StripPath)
StripPath = sys::findProgramByName("llvm-strip");
if (!StripPath)
return None;
return StripPath.takeError();
SmallString<128> TempFile;
if (Error Err = createOutputFile(Prefix + "-host", Extension, TempFile))
@ -401,11 +432,8 @@ extractFromBinary(const ObjectFile &Obj,
StripArgs.push_back("-o");
StripArgs.push_back(TempFile);
if (Verbose)
printCommands(StripArgs);
if (sys::ExecuteAndWait(*StripPath, StripArgs))
return createStringError(inconvertibleErrorCode(), "'llvm-strip' failed");
if (Error Err = executeCommands(*StripPath, StripArgs))
return Err;
return static_cast<std::string>(TempFile);
}
@ -569,13 +597,9 @@ namespace nvptx {
Expected<std::string> assemble(StringRef InputFile, Triple TheTriple,
StringRef Arch) {
// NVPTX uses the ptxas binary to create device object files.
ErrorOr<std::string> PtxasPath =
sys::findProgramByName("ptxas", {CudaBinaryPath});
Expected<std::string> PtxasPath = findProgram("ptxas", {CudaBinaryPath});
if (!PtxasPath)
PtxasPath = sys::findProgramByName("ptxas");
if (!PtxasPath)
return createStringError(PtxasPath.getError(),
"Unable to find 'ptxas' in path");
return PtxasPath.takeError();
// Create a new file to write the linked device image to.
SmallString<128> TempFile;
@ -609,8 +633,8 @@ Expected<std::string> assemble(StringRef InputFile, Triple TheTriple,
if (Verbose)
printCommands(CmdArgs);
if (sys::ExecuteAndWait(*PtxasPath, CmdArgs))
return createStringError(inconvertibleErrorCode(), "'ptxas' failed");
if (Error Err = executeCommands(*PtxasPath, CmdArgs))
return Err;
return static_cast<std::string>(TempFile);
}
@ -618,13 +642,9 @@ Expected<std::string> assemble(StringRef InputFile, Triple TheTriple,
Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
StringRef Arch) {
// NVPTX uses the nvlink binary to link device object files.
ErrorOr<std::string> NvlinkPath =
sys::findProgramByName("nvlink", {CudaBinaryPath});
Expected<std::string> NvlinkPath = findProgram("nvlink", {CudaBinaryPath});
if (!NvlinkPath)
NvlinkPath = sys::findProgramByName("nvlink");
if (!NvlinkPath)
return createStringError(NvlinkPath.getError(),
"Unable to find 'nvlink' in path");
return NvlinkPath.takeError();
// Create a new file to write the linked device image to.
SmallString<128> TempFile;
@ -653,8 +673,8 @@ Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
if (Verbose)
printCommands(CmdArgs);
if (sys::ExecuteAndWait(*NvlinkPath, CmdArgs))
return createStringError(inconvertibleErrorCode(), "'nvlink' failed");
if (Error Err = executeCommands(*NvlinkPath, CmdArgs))
return Err;
return static_cast<std::string>(TempFile);
}
@ -663,13 +683,9 @@ namespace amdgcn {
Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
StringRef Arch) {
// AMDGPU uses lld to link device object files.
ErrorOr<std::string> LLDPath =
sys::findProgramByName("lld", {getMainExecutable("lld")});
Expected<std::string> LLDPath = findProgram("lld", {CudaBinaryPath});
if (!LLDPath)
LLDPath = sys::findProgramByName("lld");
if (!LLDPath)
return createStringError(LLDPath.getError(),
"Unable to find 'lld' in path");
return LLDPath.takeError();
// Create a new file to write the linked device image to.
SmallString<128> TempFile;
@ -694,8 +710,8 @@ Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
if (Verbose)
printCommands(CmdArgs);
if (sys::ExecuteAndWait(*LLDPath, CmdArgs))
return createStringError(inconvertibleErrorCode(), "'lld' failed");
if (Error Err = executeCommands(*LLDPath, CmdArgs))
return Err;
return static_cast<std::string>(TempFile);
}
@ -774,8 +790,8 @@ Expected<std::string> link(ArrayRef<std::string> InputFiles, Triple TheTriple,
if (Verbose)
printCommands(CmdArgs);
if (sys::ExecuteAndWait(LinkerUserPath, CmdArgs))
return createStringError(inconvertibleErrorCode(), "'linker' failed");
if (Error Err = executeCommands(LinkerUserPath, CmdArgs))
return Err;
return static_cast<std::string>(TempFile);
}
@ -1185,6 +1201,9 @@ Expected<std::string> wrapDeviceImages(ArrayRef<std::string> Images) {
if (Error Err = wrapBinaries(M, ImagesToWrap))
return std::move(Err);
if (PrintWrappedModule)
llvm::errs() << M;
return compileModule(M);
}