forked from OSchip/llvm-project
[OpenMP] Integrate OpenMP target region cubin into host binary
Summary: OpenMP device offloading code generation produces a cubin file which is then integrated in the host binary using the host linker. Reviewers: arpith-jacob, caomhin, carlo.bertolli, ABataev, Hahnfeld, jlebar, rnk, hfinkel, tstellar Reviewed By: hfinkel Subscribers: sfantao, rnk, rengolin, cfe-commits Differential Revision: https://reviews.llvm.org/D29654 llvm-svn: 310291
This commit is contained in:
parent
9b8ef6e55a
commit
4cdba82ee0
|
@ -524,7 +524,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
|
|||
auto &CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()];
|
||||
if (!CudaTC) {
|
||||
CudaTC = llvm::make_unique<toolchains::CudaToolChain>(
|
||||
*this, CudaTriple, *HostTC, C.getInputArgs());
|
||||
*this, CudaTriple, *HostTC, C.getInputArgs(), Action::OFK_Cuda);
|
||||
}
|
||||
C.addOffloadDeviceToolChain(CudaTC.get(), Action::OFK_Cuda);
|
||||
}
|
||||
|
@ -582,7 +582,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
|
|||
ToolChains[TT.str() + "/" + HostTC->getTriple().str()];
|
||||
if (!CudaTC)
|
||||
CudaTC = llvm::make_unique<toolchains::CudaToolChain>(
|
||||
*this, TT, *HostTC, C.getInputArgs());
|
||||
*this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP);
|
||||
TC = CudaTC.get();
|
||||
} else
|
||||
TC = &getToolChain(C.getInputArgs(), TT);
|
||||
|
|
|
@ -5316,7 +5316,13 @@ void OffloadBundler::ConstructJobMultipleOutputs(
|
|||
for (unsigned I = 0; I < Outputs.size(); ++I) {
|
||||
if (I)
|
||||
UB += ',';
|
||||
UB += Outputs[I].getFilename();
|
||||
SmallString<256> OutputFileName(Outputs[I].getFilename());
|
||||
// Change extension of target files for OpenMP offloading
|
||||
// to NVIDIA GPUs.
|
||||
if (DepInfo[I].DependentToolChain->getTriple().isNVPTX() &&
|
||||
JA.isOffloading(Action::OFK_OpenMP))
|
||||
llvm::sys::path::replace_extension(OutputFileName, "cubin");
|
||||
UB += OutputFileName;
|
||||
}
|
||||
CmdArgs.push_back(TCArgs.MakeArgString(UB));
|
||||
CmdArgs.push_back("-unbundle");
|
||||
|
|
|
@ -1025,3 +1025,128 @@ void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D,
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Add OpenMP linker script arguments at the end of the argument list so that
|
||||
/// the fat binary is built by embedding each of the device images into the
|
||||
/// host. The linker script also defines a few symbols required by the code
|
||||
/// generation so that the images can be easily retrieved at runtime by the
|
||||
/// offloading library. This should be used only in tool chains that support
|
||||
/// linker scripts.
|
||||
void tools::AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
|
||||
const InputInfo &Output,
|
||||
const InputInfoList &Inputs,
|
||||
const ArgList &Args, ArgStringList &CmdArgs,
|
||||
const JobAction &JA) {
|
||||
|
||||
// If this is not an OpenMP host toolchain, we don't need to do anything.
|
||||
if (!JA.isHostOffloading(Action::OFK_OpenMP))
|
||||
return;
|
||||
|
||||
// Create temporary linker script. Keep it if save-temps is enabled.
|
||||
const char *LKS;
|
||||
SmallString<256> Name = llvm::sys::path::filename(Output.getFilename());
|
||||
if (C.getDriver().isSaveTempsEnabled()) {
|
||||
llvm::sys::path::replace_extension(Name, "lk");
|
||||
LKS = C.getArgs().MakeArgString(Name.c_str());
|
||||
} else {
|
||||
llvm::sys::path::replace_extension(Name, "");
|
||||
Name = C.getDriver().GetTemporaryPath(Name, "lk");
|
||||
LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str()));
|
||||
}
|
||||
|
||||
// Add linker script option to the command.
|
||||
CmdArgs.push_back("-T");
|
||||
CmdArgs.push_back(LKS);
|
||||
|
||||
// Create a buffer to write the contents of the linker script.
|
||||
std::string LksBuffer;
|
||||
llvm::raw_string_ostream LksStream(LksBuffer);
|
||||
|
||||
// Get the OpenMP offload tool chains so that we can extract the triple
|
||||
// associated with each device input.
|
||||
auto OpenMPToolChains = C.getOffloadToolChains<Action::OFK_OpenMP>();
|
||||
assert(OpenMPToolChains.first != OpenMPToolChains.second &&
|
||||
"No OpenMP toolchains??");
|
||||
|
||||
// Track the input file name and device triple in order to build the script,
|
||||
// inserting binaries in the designated sections.
|
||||
SmallVector<std::pair<std::string, const char *>, 8> InputBinaryInfo;
|
||||
|
||||
// Add commands to embed target binaries. We ensure that each section and
|
||||
// image is 16-byte aligned. This is not mandatory, but increases the
|
||||
// likelihood of data to be aligned with a cache block in several main host
|
||||
// machines.
|
||||
LksStream << "/*\n";
|
||||
LksStream << " OpenMP Offload Linker Script\n";
|
||||
LksStream << " *** Automatically generated by Clang ***\n";
|
||||
LksStream << "*/\n";
|
||||
LksStream << "TARGET(binary)\n";
|
||||
auto DTC = OpenMPToolChains.first;
|
||||
for (auto &II : Inputs) {
|
||||
const Action *A = II.getAction();
|
||||
// Is this a device linking action?
|
||||
if (A && isa<LinkJobAction>(A) &&
|
||||
A->isDeviceOffloading(Action::OFK_OpenMP)) {
|
||||
assert(DTC != OpenMPToolChains.second &&
|
||||
"More device inputs than device toolchains??");
|
||||
InputBinaryInfo.push_back(std::make_pair(
|
||||
DTC->second->getTriple().normalize(), II.getFilename()));
|
||||
++DTC;
|
||||
LksStream << "INPUT(" << II.getFilename() << ")\n";
|
||||
}
|
||||
}
|
||||
|
||||
assert(DTC == OpenMPToolChains.second &&
|
||||
"Less device inputs than device toolchains??");
|
||||
|
||||
LksStream << "SECTIONS\n";
|
||||
LksStream << "{\n";
|
||||
|
||||
// Put each target binary into a separate section.
|
||||
for (const auto &BI : InputBinaryInfo) {
|
||||
LksStream << " .omp_offloading." << BI.first << " :\n";
|
||||
LksStream << " ALIGN(0x10)\n";
|
||||
LksStream << " {\n";
|
||||
LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first
|
||||
<< " = .);\n";
|
||||
LksStream << " " << BI.second << "\n";
|
||||
LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first
|
||||
<< " = .);\n";
|
||||
LksStream << " }\n";
|
||||
}
|
||||
|
||||
// Add commands to define host entries begin and end. We use 1-byte subalign
|
||||
// so that the linker does not add any padding and the elements in this
|
||||
// section form an array.
|
||||
LksStream << " .omp_offloading.entries :\n";
|
||||
LksStream << " ALIGN(0x10)\n";
|
||||
LksStream << " SUBALIGN(0x01)\n";
|
||||
LksStream << " {\n";
|
||||
LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_begin = .);\n";
|
||||
LksStream << " *(.omp_offloading.entries)\n";
|
||||
LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_end = .);\n";
|
||||
LksStream << " }\n";
|
||||
LksStream << "}\n";
|
||||
LksStream << "INSERT BEFORE .data\n";
|
||||
LksStream.flush();
|
||||
|
||||
// Dump the contents of the linker script if the user requested that. We
|
||||
// support this option to enable testing of behavior with -###.
|
||||
if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script))
|
||||
llvm::errs() << LksBuffer;
|
||||
|
||||
// If this is a dry run, do not create the linker script file.
|
||||
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
|
||||
return;
|
||||
|
||||
// Open script file and write the contents.
|
||||
std::error_code EC;
|
||||
llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None);
|
||||
|
||||
if (EC) {
|
||||
C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
|
||||
return;
|
||||
}
|
||||
|
||||
Lksf << LksBuffer;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,13 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
|
|||
llvm::opt::ArgStringList &CmdArgs,
|
||||
const llvm::opt::ArgList &Args);
|
||||
|
||||
void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
|
||||
const InputInfo &Output,
|
||||
const InputInfoList &Inputs,
|
||||
const llvm::opt::ArgList &Args,
|
||||
llvm::opt::ArgStringList &CmdArgs,
|
||||
const JobAction &JA);
|
||||
|
||||
const char *SplitDebugName(const llvm::opt::ArgList &Args,
|
||||
const InputInfo &Input);
|
||||
|
||||
|
|
|
@ -9,7 +9,9 @@
|
|||
|
||||
#include "Cuda.h"
|
||||
#include "InputInfo.h"
|
||||
#include "CommonArgs.h"
|
||||
#include "clang/Basic/Cuda.h"
|
||||
#include "clang/Config/config.h"
|
||||
#include "clang/Basic/VirtualFileSystem.h"
|
||||
#include "clang/Driver/Compilation.h"
|
||||
#include "clang/Driver/Driver.h"
|
||||
|
@ -275,7 +277,10 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
CmdArgs.push_back("--gpu-name");
|
||||
CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
|
||||
CmdArgs.push_back("--output-file");
|
||||
CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
|
||||
SmallString<256> OutputFileName(Output.getFilename());
|
||||
if (JA.isOffloading(Action::OFK_OpenMP))
|
||||
llvm::sys::path::replace_extension(OutputFileName, "cubin");
|
||||
CmdArgs.push_back(Args.MakeArgString(OutputFileName));
|
||||
for (const auto& II : Inputs)
|
||||
CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
|
||||
|
||||
|
@ -334,14 +339,92 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
|
||||
}
|
||||
|
||||
void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const InputInfo &Output,
|
||||
const InputInfoList &Inputs,
|
||||
const ArgList &Args,
|
||||
const char *LinkingOutput) const {
|
||||
const auto &TC =
|
||||
static_cast<const toolchains::CudaToolChain &>(getToolChain());
|
||||
assert(TC.getTriple().isNVPTX() && "Wrong platform");
|
||||
|
||||
ArgStringList CmdArgs;
|
||||
|
||||
// OpenMP uses nvlink to link cubin files. The result will be embedded in the
|
||||
// host binary by the host linker.
|
||||
assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
|
||||
"CUDA toolchain not expected for an OpenMP host device.");
|
||||
|
||||
if (Output.isFilename()) {
|
||||
CmdArgs.push_back("-o");
|
||||
CmdArgs.push_back(Output.getFilename());
|
||||
} else
|
||||
assert(Output.isNothing() && "Invalid output.");
|
||||
if (Args.hasArg(options::OPT_g_Flag))
|
||||
CmdArgs.push_back("-g");
|
||||
|
||||
if (Args.hasArg(options::OPT_v))
|
||||
CmdArgs.push_back("-v");
|
||||
|
||||
StringRef GPUArch =
|
||||
Args.getLastArgValue(options::OPT_march_EQ);
|
||||
assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
|
||||
|
||||
CmdArgs.push_back("-arch");
|
||||
CmdArgs.push_back(Args.MakeArgString(GPUArch));
|
||||
|
||||
// Add paths specified in LIBRARY_PATH environment variable as -L options.
|
||||
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
|
||||
|
||||
// Add paths for the default clang library path.
|
||||
SmallString<256> DefaultLibPath =
|
||||
llvm::sys::path::parent_path(TC.getDriver().Dir);
|
||||
llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
|
||||
CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
|
||||
|
||||
// Add linking against library implementing OpenMP calls on NVPTX target.
|
||||
CmdArgs.push_back("-lomptarget-nvptx");
|
||||
|
||||
for (const auto &II : Inputs) {
|
||||
if (II.getType() == types::TY_LLVM_IR ||
|
||||
II.getType() == types::TY_LTO_IR ||
|
||||
II.getType() == types::TY_LTO_BC ||
|
||||
II.getType() == types::TY_LLVM_BC) {
|
||||
C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
|
||||
<< getToolChain().getTripleString();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Currently, we only pass the input files to the linker, we do not pass
|
||||
// any libraries that may be valid only for the host.
|
||||
if (!II.isFilename())
|
||||
continue;
|
||||
|
||||
SmallString<256> Name = llvm::sys::path::filename(II.getFilename());
|
||||
llvm::sys::path::replace_extension(Name, "cubin");
|
||||
|
||||
const char *CubinF =
|
||||
C.addTempFile(C.getArgs().MakeArgString(Name));
|
||||
|
||||
CmdArgs.push_back(CubinF);
|
||||
}
|
||||
|
||||
AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
|
||||
|
||||
const char *Exec =
|
||||
Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
|
||||
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
|
||||
}
|
||||
|
||||
/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
|
||||
/// which isn't properly a linker but nonetheless performs the step of stitching
|
||||
/// together object files from the assembler into a single blob.
|
||||
|
||||
CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
|
||||
const ToolChain &HostTC, const ArgList &Args)
|
||||
const ToolChain &HostTC, const ArgList &Args,
|
||||
const Action::OffloadKind OK)
|
||||
: ToolChain(D, Triple, Args), HostTC(HostTC),
|
||||
CudaInstallation(D, HostTC.getTriple(), Args) {
|
||||
CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
|
||||
if (CudaInstallation.isValid())
|
||||
getProgramPaths().push_back(CudaInstallation.getBinPath());
|
||||
}
|
||||
|
@ -484,6 +567,8 @@ Tool *CudaToolChain::buildAssembler() const {
|
|||
}
|
||||
|
||||
Tool *CudaToolChain::buildLinker() const {
|
||||
if (OK == Action::OFK_OpenMP)
|
||||
return new tools::NVPTX::OpenMPLinker(*this);
|
||||
return new tools::NVPTX::Linker(*this);
|
||||
}
|
||||
|
||||
|
|
|
@ -112,6 +112,20 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
|
|||
const char *LinkingOutput) const override;
|
||||
};
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool {
|
||||
public:
|
||||
OpenMPLinker(const ToolChain &TC)
|
||||
: Tool("NVPTX::OpenMPLinker", "fatbinary", TC, RF_Full, llvm::sys::WEM_UTF8,
|
||||
"--options-file") {}
|
||||
|
||||
bool hasIntegratedCPP() const override { return false; }
|
||||
|
||||
void ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const InputInfo &Output, const InputInfoList &Inputs,
|
||||
const llvm::opt::ArgList &TCArgs,
|
||||
const char *LinkingOutput) const override;
|
||||
};
|
||||
|
||||
} // end namespace NVPTX
|
||||
} // end namespace tools
|
||||
|
||||
|
@ -120,7 +134,8 @@ namespace toolchains {
|
|||
class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain {
|
||||
public:
|
||||
CudaToolChain(const Driver &D, const llvm::Triple &Triple,
|
||||
const ToolChain &HostTC, const llvm::opt::ArgList &Args);
|
||||
const ToolChain &HostTC, const llvm::opt::ArgList &Args,
|
||||
const Action::OffloadKind OK);
|
||||
|
||||
virtual const llvm::Triple *getAuxTriple() const override {
|
||||
return &HostTC.getTriple();
|
||||
|
@ -169,6 +184,9 @@ public:
|
|||
protected:
|
||||
Tool *buildAssembler() const override; // ptxas
|
||||
Tool *buildLinker() const override; // fatbinary (ok, not really a linker)
|
||||
|
||||
private:
|
||||
const Action::OffloadKind OK;
|
||||
};
|
||||
|
||||
} // end namespace toolchains
|
||||
|
|
|
@ -204,131 +204,6 @@ void tools::gcc::Linker::RenderExtraToolArgs(const JobAction &JA,
|
|||
// The types are (hopefully) good enough.
|
||||
}
|
||||
|
||||
/// Add OpenMP linker script arguments at the end of the argument list so that
|
||||
/// the fat binary is built by embedding each of the device images into the
|
||||
/// host. The linker script also defines a few symbols required by the code
|
||||
/// generation so that the images can be easily retrieved at runtime by the
|
||||
/// offloading library. This should be used only in tool chains that support
|
||||
/// linker scripts.
|
||||
static void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
|
||||
const InputInfo &Output,
|
||||
const InputInfoList &Inputs,
|
||||
const ArgList &Args, ArgStringList &CmdArgs,
|
||||
const JobAction &JA) {
|
||||
|
||||
// If this is not an OpenMP host toolchain, we don't need to do anything.
|
||||
if (!JA.isHostOffloading(Action::OFK_OpenMP))
|
||||
return;
|
||||
|
||||
// Create temporary linker script. Keep it if save-temps is enabled.
|
||||
const char *LKS;
|
||||
SmallString<256> Name = llvm::sys::path::filename(Output.getFilename());
|
||||
if (C.getDriver().isSaveTempsEnabled()) {
|
||||
llvm::sys::path::replace_extension(Name, "lk");
|
||||
LKS = C.getArgs().MakeArgString(Name.c_str());
|
||||
} else {
|
||||
llvm::sys::path::replace_extension(Name, "");
|
||||
Name = C.getDriver().GetTemporaryPath(Name, "lk");
|
||||
LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str()));
|
||||
}
|
||||
|
||||
// Add linker script option to the command.
|
||||
CmdArgs.push_back("-T");
|
||||
CmdArgs.push_back(LKS);
|
||||
|
||||
// Create a buffer to write the contents of the linker script.
|
||||
std::string LksBuffer;
|
||||
llvm::raw_string_ostream LksStream(LksBuffer);
|
||||
|
||||
// Get the OpenMP offload tool chains so that we can extract the triple
|
||||
// associated with each device input.
|
||||
auto OpenMPToolChains = C.getOffloadToolChains<Action::OFK_OpenMP>();
|
||||
assert(OpenMPToolChains.first != OpenMPToolChains.second &&
|
||||
"No OpenMP toolchains??");
|
||||
|
||||
// Track the input file name and device triple in order to build the script,
|
||||
// inserting binaries in the designated sections.
|
||||
SmallVector<std::pair<std::string, const char *>, 8> InputBinaryInfo;
|
||||
|
||||
// Add commands to embed target binaries. We ensure that each section and
|
||||
// image is 16-byte aligned. This is not mandatory, but increases the
|
||||
// likelihood of data to be aligned with a cache block in several main host
|
||||
// machines.
|
||||
LksStream << "/*\n";
|
||||
LksStream << " OpenMP Offload Linker Script\n";
|
||||
LksStream << " *** Automatically generated by Clang ***\n";
|
||||
LksStream << "*/\n";
|
||||
LksStream << "TARGET(binary)\n";
|
||||
auto DTC = OpenMPToolChains.first;
|
||||
for (auto &II : Inputs) {
|
||||
const Action *A = II.getAction();
|
||||
// Is this a device linking action?
|
||||
if (A && isa<LinkJobAction>(A) &&
|
||||
A->isDeviceOffloading(Action::OFK_OpenMP)) {
|
||||
assert(DTC != OpenMPToolChains.second &&
|
||||
"More device inputs than device toolchains??");
|
||||
InputBinaryInfo.push_back(std::make_pair(
|
||||
DTC->second->getTriple().normalize(), II.getFilename()));
|
||||
++DTC;
|
||||
LksStream << "INPUT(" << II.getFilename() << ")\n";
|
||||
}
|
||||
}
|
||||
|
||||
assert(DTC == OpenMPToolChains.second &&
|
||||
"Less device inputs than device toolchains??");
|
||||
|
||||
LksStream << "SECTIONS\n";
|
||||
LksStream << "{\n";
|
||||
|
||||
// Put each target binary into a separate section.
|
||||
for (const auto &BI : InputBinaryInfo) {
|
||||
LksStream << " .omp_offloading." << BI.first << " :\n";
|
||||
LksStream << " ALIGN(0x10)\n";
|
||||
LksStream << " {\n";
|
||||
LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first
|
||||
<< " = .);\n";
|
||||
LksStream << " " << BI.second << "\n";
|
||||
LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first
|
||||
<< " = .);\n";
|
||||
LksStream << " }\n";
|
||||
}
|
||||
|
||||
// Add commands to define host entries begin and end. We use 1-byte subalign
|
||||
// so that the linker does not add any padding and the elements in this
|
||||
// section form an array.
|
||||
LksStream << " .omp_offloading.entries :\n";
|
||||
LksStream << " ALIGN(0x10)\n";
|
||||
LksStream << " SUBALIGN(0x01)\n";
|
||||
LksStream << " {\n";
|
||||
LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_begin = .);\n";
|
||||
LksStream << " *(.omp_offloading.entries)\n";
|
||||
LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_end = .);\n";
|
||||
LksStream << " }\n";
|
||||
LksStream << "}\n";
|
||||
LksStream << "INSERT BEFORE .data\n";
|
||||
LksStream.flush();
|
||||
|
||||
// Dump the contents of the linker script if the user requested that. We
|
||||
// support this option to enable testing of behavior with -###.
|
||||
if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script))
|
||||
llvm::errs() << LksBuffer;
|
||||
|
||||
// If this is a dry run, do not create the linker script file.
|
||||
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
|
||||
return;
|
||||
|
||||
// Open script file and write the contents.
|
||||
std::error_code EC;
|
||||
llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None);
|
||||
|
||||
if (EC) {
|
||||
C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
|
||||
return;
|
||||
}
|
||||
|
||||
Lksf << LksBuffer;
|
||||
}
|
||||
|
||||
static bool addXRayRuntime(const ToolChain &TC, const ArgList &Args,
|
||||
ArgStringList &CmdArgs) {
|
||||
if (Args.hasFlag(options::OPT_fxray_instrument,
|
||||
|
|
|
@ -629,3 +629,43 @@
|
|||
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-NESTED-ERROR %s
|
||||
|
||||
// CHK-FOPENMP-TARGET-NESTED-ERROR: clang{{.*}} error: invalid -Xopenmp-target argument: '-Xopenmp-target -Xopenmp-target', options requiring arguments are unsupported
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check -Xopenmp-target uses one of the archs provided when several archs are used.
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
|
||||
|
||||
// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60"
|
||||
// CHK-FOPENMP-TARGET-ARCHS: nvlink{{.*}}" "-arch" "sm_60"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present.
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
|
||||
|
||||
// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35"
|
||||
// CHK-FOPENMP-TARGET-COMPILATION: nvlink{{.*}}" "-arch" "sm_35"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check cubin file generation and usage by nvlink
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-CUBIN %s
|
||||
|
||||
// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda.s"
|
||||
// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" "{{.*}}-openmp-nvptx64-nvidia-cuda.cubin" "{{.*}}-openmp-nvptx64-nvidia-cuda.s"
|
||||
// CHK-CUBIN-NEXT: nvlink" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda" {{.*}} "openmp-offload-openmp-nvptx64-nvidia-cuda.cubin"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check cubin file generation and usage by nvlink
|
||||
// RUN: touch %t1.o
|
||||
// RUN: touch %t2.o
|
||||
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes %t1.o %t2.o 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
|
||||
|
||||
// CHK-TWOCUBIN: clang-offload-bundler" "-type=o" "{{.*}}inputs={{.*}}tmp1.o" "-outputs={{.*}}.o,{{.*}}tmp1-openmp-nvptx64-nvidia-cuda.cubin" "-unbundle"
|
||||
// CHK-TWOCUBIN-NEXT: clang-offload-bundler" "-type=o" "{{.*}}inputs={{.*}}tmp2.o" "-outputs={{.*}}.o,{{.*}}tmp2-openmp-nvptx64-nvidia-cuda.cubin" "-unbundle"
|
||||
// CHK-TWOCUBIN-NEXT: nvlink" "-o" "{{.*}}-openmp-nvptx64-nvidia-cuda" {{.*}} "openmp-offload.c.tmp1-openmp-nvptx64-nvidia-cuda.cubin" "openmp-offload.c.tmp2-openmp-nvptx64-nvidia-cuda.cubin"
|
||||
|
|
Loading…
Reference in New Issue