forked from OSchip/llvm-project
[OpenMP] Deprecate the old driver for OpenMP offloading
Recently OpenMP has transitioned to using the "new" driver which primarily merges the device and host linking phases into a single wrapper that handles both at the same time. This replaced a few tools that were only used for OpenMP offloading, such as the `clang-offload-wrapper` and `clang-nvlink-wrapper`. The new driver carries some marked benefits compared to the old driver that is now being deprecated. Things like device-side LTO, static library support, and more compatible tooling. As such, we should be able to completely deprecate the old driver, at least for OpenMP. The old driver support will still exist for CUDA and HIP, although both of these can currently be compiled on Linux with `--offload-new-driver` to use the new method. Note that this does not deprecate the `clang-offload-bundler`, although it is unused by OpenMP now, it is still used by the HIP toolchain both as their device binary format and object format. When I proposed deprecating this code I heard some vendors voice concernes about needing to update their code in their fork. They should be able to just revert this commit if it lands. Reviewed By: jdoerfert, MaskRay, ye-luo Differential Revision: https://reviews.llvm.org/D130020
This commit is contained in:
parent
eabfaced50
commit
47166968db
|
@ -73,7 +73,6 @@ public:
|
|||
VerifyPCHJobClass,
|
||||
OffloadBundlingJobClass,
|
||||
OffloadUnbundlingJobClass,
|
||||
OffloadWrapperJobClass,
|
||||
OffloadPackagerJobClass,
|
||||
LinkerWrapperJobClass,
|
||||
StaticLibJobClass,
|
||||
|
@ -659,17 +658,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class OffloadWrapperJobAction : public JobAction {
|
||||
void anchor() override;
|
||||
|
||||
public:
|
||||
OffloadWrapperJobAction(ActionList &Inputs, types::ID Type);
|
||||
|
||||
static bool classof(const Action *A) {
|
||||
return A->getKind() == OffloadWrapperJobClass;
|
||||
}
|
||||
};
|
||||
|
||||
class OffloadPackagerJobAction : public JobAction {
|
||||
void anchor() override;
|
||||
|
||||
|
|
|
@ -150,7 +150,6 @@ private:
|
|||
mutable std::unique_ptr<Tool> StaticLibTool;
|
||||
mutable std::unique_ptr<Tool> IfsMerge;
|
||||
mutable std::unique_ptr<Tool> OffloadBundler;
|
||||
mutable std::unique_ptr<Tool> OffloadWrapper;
|
||||
mutable std::unique_ptr<Tool> OffloadPackager;
|
||||
mutable std::unique_ptr<Tool> LinkerWrapper;
|
||||
|
||||
|
@ -162,7 +161,6 @@ private:
|
|||
Tool *getIfsMerge() const;
|
||||
Tool *getClangAs() const;
|
||||
Tool *getOffloadBundler() const;
|
||||
Tool *getOffloadWrapper() const;
|
||||
Tool *getOffloadPackager() const;
|
||||
Tool *getLinkerWrapper() const;
|
||||
|
||||
|
|
|
@ -43,8 +43,6 @@ const char *Action::getClassName(ActionClass AC) {
|
|||
return "clang-offload-bundler";
|
||||
case OffloadUnbundlingJobClass:
|
||||
return "clang-offload-unbundler";
|
||||
case OffloadWrapperJobClass:
|
||||
return "clang-offload-wrapper";
|
||||
case OffloadPackagerJobClass:
|
||||
return "clang-offload-packager";
|
||||
case LinkerWrapperJobClass:
|
||||
|
@ -428,12 +426,6 @@ void OffloadUnbundlingJobAction::anchor() {}
|
|||
OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(Action *Input)
|
||||
: JobAction(OffloadUnbundlingJobClass, Input, Input->getType()) {}
|
||||
|
||||
void OffloadWrapperJobAction::anchor() {}
|
||||
|
||||
OffloadWrapperJobAction::OffloadWrapperJobAction(ActionList &Inputs,
|
||||
types::ID Type)
|
||||
: JobAction(OffloadWrapperJobClass, Inputs, Type) {}
|
||||
|
||||
void OffloadPackagerJobAction::anchor() {}
|
||||
|
||||
OffloadPackagerJobAction::OffloadPackagerJobAction(ActionList &Inputs,
|
||||
|
|
|
@ -3441,178 +3441,6 @@ class OffloadingActionBuilder final {
|
|||
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
|
||||
};
|
||||
|
||||
/// OpenMP action builder. The host bitcode is passed to the device frontend
|
||||
/// and all the device linked images are passed to the host link phase.
|
||||
class OpenMPActionBuilder final : public DeviceActionBuilder {
|
||||
/// The OpenMP actions for the current input.
|
||||
ActionList OpenMPDeviceActions;
|
||||
|
||||
/// The linker inputs obtained for each toolchain.
|
||||
SmallVector<ActionList, 8> DeviceLinkerInputs;
|
||||
|
||||
public:
|
||||
OpenMPActionBuilder(Compilation &C, DerivedArgList &Args,
|
||||
const Driver::InputList &Inputs)
|
||||
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {}
|
||||
|
||||
ActionBuilderReturnCode
|
||||
getDeviceDependences(OffloadAction::DeviceDependences &DA,
|
||||
phases::ID CurPhase, phases::ID FinalPhase,
|
||||
PhasesTy &Phases) override {
|
||||
if (OpenMPDeviceActions.empty())
|
||||
return ABRT_Inactive;
|
||||
|
||||
// We should always have an action for each input.
|
||||
assert(OpenMPDeviceActions.size() == ToolChains.size() &&
|
||||
"Number of OpenMP actions and toolchains do not match.");
|
||||
|
||||
// The host only depends on device action in the linking phase, when all
|
||||
// the device images have to be embedded in the host image.
|
||||
if (CurPhase == phases::Link) {
|
||||
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
|
||||
"Toolchains and linker inputs sizes do not match.");
|
||||
auto LI = DeviceLinkerInputs.begin();
|
||||
for (auto *A : OpenMPDeviceActions) {
|
||||
LI->push_back(A);
|
||||
++LI;
|
||||
}
|
||||
|
||||
// We passed the device action as a host dependence, so we don't need to
|
||||
// do anything else with them.
|
||||
OpenMPDeviceActions.clear();
|
||||
return ABRT_Success;
|
||||
}
|
||||
|
||||
// By default, we produce an action for each device arch.
|
||||
for (Action *&A : OpenMPDeviceActions)
|
||||
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
|
||||
|
||||
return ABRT_Success;
|
||||
}
|
||||
|
||||
ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
|
||||
|
||||
// If this is an input action replicate it for each OpenMP toolchain.
|
||||
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
|
||||
OpenMPDeviceActions.clear();
|
||||
for (unsigned I = 0; I < ToolChains.size(); ++I)
|
||||
OpenMPDeviceActions.push_back(
|
||||
C.MakeAction<InputAction>(IA->getInputArg(), IA->getType()));
|
||||
return ABRT_Success;
|
||||
}
|
||||
|
||||
// If this is an unbundling action use it as is for each OpenMP toolchain.
|
||||
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
|
||||
OpenMPDeviceActions.clear();
|
||||
auto *IA = cast<InputAction>(UA->getInputs().back());
|
||||
std::string FileName = IA->getInputArg().getAsString(Args);
|
||||
// Check if the type of the file is the same as the action. Do not
|
||||
// unbundle it if it is not. Do not unbundle .so files, for example,
|
||||
// which are not object files.
|
||||
if (IA->getType() == types::TY_Object &&
|
||||
(!llvm::sys::path::has_extension(FileName) ||
|
||||
types::lookupTypeForExtension(
|
||||
llvm::sys::path::extension(FileName).drop_front()) !=
|
||||
types::TY_Object))
|
||||
return ABRT_Inactive;
|
||||
for (unsigned I = 0; I < ToolChains.size(); ++I) {
|
||||
OpenMPDeviceActions.push_back(UA);
|
||||
UA->registerDependentActionInfo(
|
||||
ToolChains[I], /*BoundArch=*/StringRef(), Action::OFK_OpenMP);
|
||||
}
|
||||
return ABRT_Success;
|
||||
}
|
||||
|
||||
// When generating code for OpenMP we use the host compile phase result as
|
||||
// a dependence to the device compile phase so that it can learn what
|
||||
// declarations should be emitted. However, this is not the only use for
|
||||
// the host action, so we prevent it from being collapsed.
|
||||
if (isa<CompileJobAction>(HostAction)) {
|
||||
HostAction->setCannotBeCollapsedWithNextDependentAction();
|
||||
assert(ToolChains.size() == OpenMPDeviceActions.size() &&
|
||||
"Toolchains and device action sizes do not match.");
|
||||
OffloadAction::HostDependence HDep(
|
||||
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
|
||||
/*BoundArch=*/nullptr, Action::OFK_OpenMP);
|
||||
auto TC = ToolChains.begin();
|
||||
for (Action *&A : OpenMPDeviceActions) {
|
||||
assert(isa<CompileJobAction>(A));
|
||||
OffloadAction::DeviceDependences DDep;
|
||||
DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
|
||||
A = C.MakeAction<OffloadAction>(HDep, DDep);
|
||||
++TC;
|
||||
}
|
||||
}
|
||||
return ABRT_Success;
|
||||
}
|
||||
|
||||
void appendTopLevelActions(ActionList &AL) override {
|
||||
if (OpenMPDeviceActions.empty())
|
||||
return;
|
||||
|
||||
// We should always have an action for each input.
|
||||
assert(OpenMPDeviceActions.size() == ToolChains.size() &&
|
||||
"Number of OpenMP actions and toolchains do not match.");
|
||||
|
||||
// Append all device actions followed by the proper offload action.
|
||||
auto TI = ToolChains.begin();
|
||||
for (auto *A : OpenMPDeviceActions) {
|
||||
OffloadAction::DeviceDependences Dep;
|
||||
Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
|
||||
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
|
||||
++TI;
|
||||
}
|
||||
// We no longer need the action stored in this builder.
|
||||
OpenMPDeviceActions.clear();
|
||||
}
|
||||
|
||||
void appendLinkDeviceActions(ActionList &AL) override {
|
||||
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
|
||||
"Toolchains and linker inputs sizes do not match.");
|
||||
|
||||
// Append a new link action for each device.
|
||||
auto TC = ToolChains.begin();
|
||||
for (auto &LI : DeviceLinkerInputs) {
|
||||
auto *DeviceLinkAction =
|
||||
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
|
||||
OffloadAction::DeviceDependences DeviceLinkDeps;
|
||||
DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
|
||||
Action::OFK_OpenMP);
|
||||
AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
|
||||
DeviceLinkAction->getType()));
|
||||
++TC;
|
||||
}
|
||||
DeviceLinkerInputs.clear();
|
||||
}
|
||||
|
||||
Action* appendLinkHostActions(ActionList &AL) override {
|
||||
// Create wrapper bitcode from the result of device link actions and compile
|
||||
// it to an object which will be added to the host link command.
|
||||
auto *BC = C.MakeAction<OffloadWrapperJobAction>(AL, types::TY_LLVM_BC);
|
||||
auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
|
||||
return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
|
||||
}
|
||||
|
||||
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
|
||||
|
||||
bool initialize() override {
|
||||
// Get the OpenMP toolchains. If we don't get any, the action builder will
|
||||
// know there is nothing to do related to OpenMP offloading.
|
||||
auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
|
||||
for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE;
|
||||
++TI)
|
||||
ToolChains.push_back(TI->second);
|
||||
|
||||
DeviceLinkerInputs.resize(ToolChains.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
bool canUseBundlerUnbundler() const override {
|
||||
// OpenMP should use bundled files whenever possible.
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
///
|
||||
/// TODO: Add the implementation for other specialized builders here.
|
||||
///
|
||||
|
@ -3637,9 +3465,6 @@ public:
|
|||
// Create a specialized builder for HIP.
|
||||
SpecializedBuilders.push_back(new HIPActionBuilder(C, Args, Inputs));
|
||||
|
||||
// Create a specialized builder for OpenMP.
|
||||
SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));
|
||||
|
||||
//
|
||||
// TODO: Build other specialized builders here.
|
||||
//
|
||||
|
@ -5505,14 +5330,6 @@ InputInfoList Driver::BuildJobsForActionNoCache(
|
|||
/*CreatePrefixForHost=*/isa<OffloadPackagerJobAction>(A) ||
|
||||
!(A->getOffloadingHostActiveKinds() == Action::OFK_None ||
|
||||
AtTopLevel));
|
||||
if (isa<OffloadWrapperJobAction>(JA)) {
|
||||
if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
|
||||
BaseInput = FinalOutput->getValue();
|
||||
else
|
||||
BaseInput = getDefaultImageName();
|
||||
BaseInput =
|
||||
C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper");
|
||||
}
|
||||
Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
|
||||
AtTopLevel, MultipleArchs,
|
||||
OffloadingPrefix),
|
||||
|
|
|
@ -351,12 +351,6 @@ Tool *ToolChain::getOffloadBundler() const {
|
|||
return OffloadBundler.get();
|
||||
}
|
||||
|
||||
Tool *ToolChain::getOffloadWrapper() const {
|
||||
if (!OffloadWrapper)
|
||||
OffloadWrapper.reset(new tools::OffloadWrapper(*this));
|
||||
return OffloadWrapper.get();
|
||||
}
|
||||
|
||||
Tool *ToolChain::getOffloadPackager() const {
|
||||
if (!OffloadPackager)
|
||||
OffloadPackager.reset(new tools::OffloadPackager(*this));
|
||||
|
@ -406,8 +400,6 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const {
|
|||
case Action::OffloadUnbundlingJobClass:
|
||||
return getOffloadBundler();
|
||||
|
||||
case Action::OffloadWrapperJobClass:
|
||||
return getOffloadWrapper();
|
||||
case Action::OffloadPackagerJobClass:
|
||||
return getOffloadPackager();
|
||||
case Action::LinkerWrapperJobClass:
|
||||
|
|
|
@ -31,48 +31,6 @@ using namespace llvm::opt;
|
|||
|
||||
namespace {
|
||||
|
||||
static const char *getOutputFileName(Compilation &C, StringRef Base,
|
||||
const char *Postfix,
|
||||
const char *Extension) {
|
||||
const char *OutputFileName;
|
||||
if (C.getDriver().isSaveTempsEnabled()) {
|
||||
OutputFileName =
|
||||
C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
|
||||
} else {
|
||||
std::string TmpName =
|
||||
C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
|
||||
OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
|
||||
}
|
||||
return OutputFileName;
|
||||
}
|
||||
|
||||
static void addLLCOptArg(const llvm::opt::ArgList &Args,
|
||||
llvm::opt::ArgStringList &CmdArgs) {
|
||||
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
|
||||
StringRef OOpt = "0";
|
||||
if (A->getOption().matches(options::OPT_O4) ||
|
||||
A->getOption().matches(options::OPT_Ofast))
|
||||
OOpt = "3";
|
||||
else if (A->getOption().matches(options::OPT_O0))
|
||||
OOpt = "0";
|
||||
else if (A->getOption().matches(options::OPT_O)) {
|
||||
// Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
|
||||
// so we map -Os/-Oz to -O2.
|
||||
// Only clang supports -Og, and maps it to -O1.
|
||||
// We map anything else to -O2.
|
||||
OOpt = llvm::StringSwitch<const char *>(A->getValue())
|
||||
.Case("1", "1")
|
||||
.Case("2", "2")
|
||||
.Case("3", "3")
|
||||
.Case("s", "2")
|
||||
.Case("z", "2")
|
||||
.Case("g", "1")
|
||||
.Default("0");
|
||||
}
|
||||
CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
|
||||
}
|
||||
}
|
||||
|
||||
static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
|
||||
std::string &GPUArch) {
|
||||
if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
|
||||
|
@ -86,173 +44,6 @@ static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
|
|||
}
|
||||
} // namespace
|
||||
|
||||
const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
|
||||
const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
|
||||
const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args,
|
||||
StringRef SubArchName, StringRef OutputFilePrefix) const {
|
||||
ArgStringList CmdArgs;
|
||||
|
||||
for (const auto &II : Inputs)
|
||||
if (II.isFilename())
|
||||
CmdArgs.push_back(II.getFilename());
|
||||
|
||||
bool HasLibm = false;
|
||||
if (Args.hasArg(options::OPT_l)) {
|
||||
auto Lm = Args.getAllArgValues(options::OPT_l);
|
||||
for (auto &Lib : Lm) {
|
||||
if (Lib == "m") {
|
||||
HasLibm = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (HasLibm) {
|
||||
// This is not certain to work. The device libs added here, and passed to
|
||||
// llvm-link, are missing attributes that they expect to be inserted when
|
||||
// passed to mlink-builtin-bitcode. The amdgpu backend does not generate
|
||||
// conservatively correct code when attributes are missing, so this may
|
||||
// be the root cause of miscompilations. Passing via mlink-builtin-bitcode
|
||||
// ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes
|
||||
// on each function, see D28538 for context.
|
||||
// Potential workarounds:
|
||||
// - unconditionally link all of the device libs to every translation
|
||||
// unit in clang via mlink-builtin-bitcode
|
||||
// - build a libm bitcode file as part of the DeviceRTL and explictly
|
||||
// mlink-builtin-bitcode the rocm device libs components at build time
|
||||
// - drop this llvm-link fork in favour or some calls into LLVM, chosen
|
||||
// to do basically the same work as llvm-link but with that call first
|
||||
// - write an opt pass that sets that on every function it sees and pipe
|
||||
// the device-libs bitcode through that on the way to this llvm-link
|
||||
SmallVector<std::string, 12> BCLibs =
|
||||
AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
|
||||
for (StringRef BCFile : BCLibs)
|
||||
CmdArgs.push_back(Args.MakeArgString(BCFile));
|
||||
}
|
||||
}
|
||||
|
||||
AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
|
||||
SubArchName, /*isBitCodeSDL=*/true,
|
||||
/*postClangLink=*/false);
|
||||
// Add an intermediate output file.
|
||||
CmdArgs.push_back("-o");
|
||||
const char *OutputFileName =
|
||||
getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
|
||||
CmdArgs.push_back(OutputFileName);
|
||||
const char *Exec =
|
||||
Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
|
||||
C.addCommand(std::make_unique<Command>(
|
||||
JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs,
|
||||
InputInfo(&JA, Args.MakeArgString(OutputFileName))));
|
||||
|
||||
// If we linked in libm definitions late we run another round of optimizations
|
||||
// to inline the definitions and fold what is foldable.
|
||||
if (HasLibm) {
|
||||
ArgStringList OptCmdArgs;
|
||||
const char *OptOutputFileName =
|
||||
getOutputFileName(C, OutputFilePrefix, "-linked-opt", "bc");
|
||||
addLLCOptArg(Args, OptCmdArgs);
|
||||
OptCmdArgs.push_back(OutputFileName);
|
||||
OptCmdArgs.push_back("-o");
|
||||
OptCmdArgs.push_back(OptOutputFileName);
|
||||
const char *OptExec =
|
||||
Args.MakeArgString(getToolChain().GetProgramPath("opt"));
|
||||
C.addCommand(std::make_unique<Command>(
|
||||
JA, *this, ResponseFileSupport::AtFileCurCP(), OptExec, OptCmdArgs,
|
||||
InputInfo(&JA, Args.MakeArgString(OutputFileName)),
|
||||
InputInfo(&JA, Args.MakeArgString(OptOutputFileName))));
|
||||
OutputFileName = OptOutputFileName;
|
||||
}
|
||||
|
||||
return OutputFileName;
|
||||
}
|
||||
|
||||
const char *AMDGCN::OpenMPLinker::constructLlcCommand(
|
||||
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
|
||||
const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
|
||||
llvm::StringRef OutputFilePrefix, const char *InputFileName,
|
||||
bool OutputIsAsm) const {
|
||||
// Construct llc command.
|
||||
ArgStringList LlcArgs;
|
||||
// The input to llc is the output from opt.
|
||||
LlcArgs.push_back(InputFileName);
|
||||
// Pass optimization arg to llc.
|
||||
addLLCOptArg(Args, LlcArgs);
|
||||
LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
|
||||
LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
|
||||
LlcArgs.push_back(
|
||||
Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));
|
||||
|
||||
for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
|
||||
LlcArgs.push_back(A->getValue(0));
|
||||
}
|
||||
|
||||
// Add output filename
|
||||
LlcArgs.push_back("-o");
|
||||
const char *LlcOutputFile =
|
||||
getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
|
||||
LlcArgs.push_back(LlcOutputFile);
|
||||
const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc"));
|
||||
C.addCommand(std::make_unique<Command>(
|
||||
JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs,
|
||||
InputInfo(&JA, Args.MakeArgString(LlcOutputFile))));
|
||||
return LlcOutputFile;
|
||||
}
|
||||
|
||||
void AMDGCN::OpenMPLinker::constructLldCommand(
|
||||
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
|
||||
const InputInfo &Output, const llvm::opt::ArgList &Args,
|
||||
const char *InputFileName) const {
|
||||
// Construct lld command.
|
||||
// The output from ld.lld is an HSA code object file.
|
||||
ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined",
|
||||
"-shared", "-o", Output.getFilename(),
|
||||
InputFileName};
|
||||
|
||||
const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
|
||||
C.addCommand(std::make_unique<Command>(
|
||||
JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs,
|
||||
InputInfo(&JA, Args.MakeArgString(Output.getFilename()))));
|
||||
}
|
||||
|
||||
// For amdgcn the inputs of the linker job are device bitcode and output is
|
||||
// object file. It calls llvm-link, opt, llc, then lld steps.
|
||||
void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const InputInfo &Output,
|
||||
const InputInfoList &Inputs,
|
||||
const ArgList &Args,
|
||||
const char *LinkingOutput) const {
|
||||
const ToolChain &TC = getToolChain();
|
||||
assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");
|
||||
|
||||
const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC =
|
||||
static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC);
|
||||
|
||||
std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str();
|
||||
if (GPUArch.empty()) {
|
||||
if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch))
|
||||
return;
|
||||
}
|
||||
|
||||
// Prefix for temporary file name.
|
||||
std::string Prefix;
|
||||
for (const auto &II : Inputs)
|
||||
if (II.isFilename())
|
||||
Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch;
|
||||
assert(Prefix.length() && "no linker inputs are files ");
|
||||
|
||||
// Each command outputs different files.
|
||||
const char *LLVMLinkCommand = constructLLVMLinkCommand(
|
||||
AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix);
|
||||
|
||||
// Produce readable assembly if save-temps is enabled.
|
||||
if (C.getDriver().isSaveTempsEnabled())
|
||||
constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand,
|
||||
/*OutputIsAsm=*/true);
|
||||
const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch,
|
||||
Prefix, LLVMLinkCommand);
|
||||
constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
|
||||
}
|
||||
|
||||
AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D,
|
||||
const llvm::Triple &Triple,
|
||||
const ToolChain &HostTC,
|
||||
|
@ -329,11 +120,6 @@ llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
|
|||
return DAL;
|
||||
}
|
||||
|
||||
Tool *AMDGPUOpenMPToolChain::buildLinker() const {
|
||||
assert(getTriple().isAMDGCN());
|
||||
return new tools::AMDGCN::OpenMPLinker(*this);
|
||||
}
|
||||
|
||||
void AMDGPUOpenMPToolChain::addClangWarningOptions(
|
||||
ArgStringList &CC1Args) const {
|
||||
HostTC.addClangWarningOptions(CC1Args);
|
||||
|
|
|
@ -20,49 +20,6 @@ namespace toolchains {
|
|||
class AMDGPUOpenMPToolChain;
|
||||
}
|
||||
|
||||
namespace tools {
|
||||
|
||||
namespace AMDGCN {
|
||||
// Runs llvm-link/opt/llc/lld, which links multiple LLVM bitcode, together with
|
||||
// device library, then compiles it to ISA in a shared object.
|
||||
class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool {
|
||||
public:
|
||||
OpenMPLinker(const ToolChain &TC)
|
||||
: Tool("AMDGCN::OpenMPLinker", "amdgcn-link", TC) {}
|
||||
|
||||
bool hasIntegratedCPP() const override { return false; }
|
||||
|
||||
void ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const InputInfo &Output, const InputInfoList &Inputs,
|
||||
const llvm::opt::ArgList &TCArgs,
|
||||
const char *LinkingOutput) const override;
|
||||
|
||||
private:
|
||||
/// \return llvm-link output file name.
|
||||
const char *constructLLVMLinkCommand(
|
||||
const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
|
||||
const JobAction &JA, const InputInfoList &Inputs,
|
||||
const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
|
||||
llvm::StringRef OutputFilePrefix) const;
|
||||
|
||||
/// \return llc output file name.
|
||||
const char *constructLlcCommand(Compilation &C, const JobAction &JA,
|
||||
const InputInfoList &Inputs,
|
||||
const llvm::opt::ArgList &Args,
|
||||
llvm::StringRef SubArchName,
|
||||
llvm::StringRef OutputFilePrefix,
|
||||
const char *InputFileName,
|
||||
bool OutputIsAsm = false) const;
|
||||
|
||||
void constructLldCommand(Compilation &C, const JobAction &JA,
|
||||
const InputInfoList &Inputs, const InputInfo &Output,
|
||||
const llvm::opt::ArgList &Args,
|
||||
const char *InputFileName) const;
|
||||
};
|
||||
|
||||
} // end namespace AMDGCN
|
||||
} // end namespace tools
|
||||
|
||||
namespace toolchains {
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final
|
||||
|
@ -98,9 +55,6 @@ public:
|
|||
const llvm::opt::ArgList &Args) const override;
|
||||
|
||||
const ToolChain &HostTC;
|
||||
|
||||
protected:
|
||||
Tool *buildLinker() const override;
|
||||
};
|
||||
|
||||
} // end namespace toolchains
|
||||
|
|
|
@ -8303,36 +8303,6 @@ void OffloadBundler::ConstructJobMultipleOutputs(
|
|||
CmdArgs, None, Outputs));
|
||||
}
|
||||
|
||||
void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const InputInfo &Output,
|
||||
const InputInfoList &Inputs,
|
||||
const ArgList &Args,
|
||||
const char *LinkingOutput) const {
|
||||
ArgStringList CmdArgs;
|
||||
|
||||
const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
|
||||
|
||||
// Add the "effective" target triple.
|
||||
CmdArgs.push_back("-target");
|
||||
CmdArgs.push_back(Args.MakeArgString(Triple.getTriple()));
|
||||
|
||||
// Add the output file name.
|
||||
assert(Output.isFilename() && "Invalid output.");
|
||||
CmdArgs.push_back("-o");
|
||||
CmdArgs.push_back(Output.getFilename());
|
||||
|
||||
// Add inputs.
|
||||
for (const InputInfo &I : Inputs) {
|
||||
assert(I.isFilename() && "Invalid input.");
|
||||
CmdArgs.push_back(I.getFilename());
|
||||
}
|
||||
|
||||
C.addCommand(std::make_unique<Command>(
|
||||
JA, *this, ResponseFileSupport::None(),
|
||||
Args.MakeArgString(getToolChain().GetProgramPath(getShortName())),
|
||||
CmdArgs, Inputs, Output));
|
||||
}
|
||||
|
||||
void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const InputInfo &Output,
|
||||
const InputInfoList &Inputs,
|
||||
|
|
|
@ -552,88 +552,6 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
Exec, CmdArgs, Inputs, Output));
|
||||
}
|
||||
|
||||
void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const InputInfo &Output,
|
||||
const InputInfoList &Inputs,
|
||||
const ArgList &Args,
|
||||
const char *LinkingOutput) const {
|
||||
const auto &TC =
|
||||
static_cast<const toolchains::CudaToolChain &>(getToolChain());
|
||||
assert(TC.getTriple().isNVPTX() && "Wrong platform");
|
||||
|
||||
ArgStringList CmdArgs;
|
||||
|
||||
// OpenMP uses nvlink to link cubin files. The result will be embedded in the
|
||||
// host binary by the host linker.
|
||||
assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
|
||||
"CUDA toolchain not expected for an OpenMP host device.");
|
||||
|
||||
if (Output.isFilename()) {
|
||||
CmdArgs.push_back("-o");
|
||||
CmdArgs.push_back(Output.getFilename());
|
||||
} else
|
||||
assert(Output.isNothing() && "Invalid output.");
|
||||
if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
|
||||
CmdArgs.push_back("-g");
|
||||
|
||||
if (Args.hasArg(options::OPT_v))
|
||||
CmdArgs.push_back("-v");
|
||||
|
||||
StringRef GPUArch =
|
||||
Args.getLastArgValue(options::OPT_march_EQ);
|
||||
assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
|
||||
|
||||
CmdArgs.push_back("-arch");
|
||||
CmdArgs.push_back(Args.MakeArgString(GPUArch));
|
||||
|
||||
// Add paths specified in LIBRARY_PATH environment variable as -L options.
|
||||
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
|
||||
|
||||
// Add paths for the default clang library path.
|
||||
SmallString<256> DefaultLibPath =
|
||||
llvm::sys::path::parent_path(TC.getDriver().Dir);
|
||||
llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
|
||||
CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
|
||||
|
||||
for (const auto &II : Inputs) {
|
||||
if (II.getType() == types::TY_LLVM_IR ||
|
||||
II.getType() == types::TY_LTO_IR ||
|
||||
II.getType() == types::TY_LTO_BC ||
|
||||
II.getType() == types::TY_LLVM_BC) {
|
||||
C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
|
||||
<< getToolChain().getTripleString();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Currently, we only pass the input files to the linker, we do not pass
|
||||
// any libraries that may be valid only for the host.
|
||||
if (!II.isFilename())
|
||||
continue;
|
||||
|
||||
const char *CubinF =
|
||||
C.getArgs().MakeArgString(getToolChain().getInputFilename(II));
|
||||
|
||||
CmdArgs.push_back(CubinF);
|
||||
}
|
||||
|
||||
AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx",
|
||||
GPUArch, /*isBitCodeSDL=*/false,
|
||||
/*postClangLink=*/false);
|
||||
|
||||
// Find nvlink and pass it as "--nvlink-path=" argument of
|
||||
// clang-nvlink-wrapper.
|
||||
CmdArgs.push_back(Args.MakeArgString(
|
||||
Twine("--nvlink-path=" + getToolChain().GetProgramPath("nvlink"))));
|
||||
|
||||
const char *Exec =
|
||||
Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
|
||||
C.addCommand(std::make_unique<Command>(
|
||||
JA, *this,
|
||||
ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
|
||||
"--options-file"},
|
||||
Exec, CmdArgs, Inputs, Output));
|
||||
}
|
||||
|
||||
void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
|
||||
const llvm::opt::ArgList &Args,
|
||||
std::vector<StringRef> &Features) {
|
||||
|
@ -766,9 +684,6 @@ void CudaToolChain::addClangTargetOptions(
|
|||
|
||||
addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GpuArch.str(),
|
||||
getTriple());
|
||||
AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
|
||||
GpuArch, /*isBitCodeSDL=*/true,
|
||||
/*postClangLink=*/true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -868,8 +783,6 @@ Tool *CudaToolChain::buildAssembler() const {
|
|||
}
|
||||
|
||||
Tool *CudaToolChain::buildLinker() const {
|
||||
if (OK == Action::OFK_OpenMP)
|
||||
return new tools::NVPTX::OpenMPLinker(*this);
|
||||
return new tools::NVPTX::Linker(*this);
|
||||
}
|
||||
|
||||
|
|
|
@ -111,19 +111,6 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
|
|||
const char *LinkingOutput) const override;
|
||||
};
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool {
|
||||
public:
|
||||
OpenMPLinker(const ToolChain &TC)
|
||||
: Tool("NVPTX::OpenMPLinker", "nvlink", TC) {}
|
||||
|
||||
bool hasIntegratedCPP() const override { return false; }
|
||||
|
||||
void ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const InputInfo &Output, const InputInfoList &Inputs,
|
||||
const llvm::opt::ArgList &TCArgs,
|
||||
const char *LinkingOutput) const override;
|
||||
};
|
||||
|
||||
void getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
|
||||
const llvm::opt::ArgList &Args,
|
||||
std::vector<StringRef> &Features);
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
// REQUIRES: system-linux
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// REQUIRES: shell
|
||||
|
||||
// RUN: mkdir -p %t
|
||||
// RUN: rm -f %t/amdgpu_arch_gfx906
|
||||
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
|
||||
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908 %t/
|
||||
// RUN: chmod +x %t/amdgpu_arch_gfx906
|
||||
// RUN: chmod +x %t/amdgpu_arch_gfx908_gfx908
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
|
||||
// RUN: | FileCheck %s
|
||||
// CHECK: "-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
|
||||
// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
|
||||
// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
|
||||
|
||||
// case when amdgpu_arch returns multiple gpus but of same arch
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_gfx908_gfx908 %s 2>&1 \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-MULTIPLE
|
||||
// CHECK-MULTIPLE: "-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx908]]"
|
||||
// CHECK-MULTIPLE: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
|
||||
// CHECK-MULTIPLE: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-{{.*}}.o"
|
|
@ -1,53 +0,0 @@
|
|||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
|
||||
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
|
||||
// RUN: | FileCheck %s
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
|
||||
// RUN: --offload-arch=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
// verify the tools invocations
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
|
||||
// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx906"{{.*}}"-fcuda-is-device"{{.*}}"-mlink-builtin-bitcode" "{{.*}}libomptarget-amdgpu-gfx906.bc"
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
|
||||
// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out"
|
||||
|
||||
// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
|
||||
// RUN: | FileCheck --check-prefix=CHECK-PHASES %s
|
||||
// CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp)
|
||||
// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp)
|
||||
// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp)
|
||||
// CHECK-PHASES: 3: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHECK-PHASES: 4: preprocessor, {3}, cpp-output, (device-openmp)
|
||||
// CHECK-PHASES: 5: compiler, {4}, ir, (device-openmp)
|
||||
// CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {5}, ir
|
||||
// CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp)
|
||||
// CHECK-PHASES: 8: assembler, {7}, object, (device-openmp)
|
||||
// CHECK-PHASES: 9: offload, "device-openmp (amdgcn-amd-amdhsa)" {8}, object
|
||||
// CHECK-PHASES: 10: clang-offload-packager, {9}, image
|
||||
// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, " (x86_64-unknown-linux-gnu)" {10}, ir
|
||||
// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
|
||||
// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
|
||||
// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp)
|
||||
|
||||
// handling of --libomptarget-amdgpu-bc-path
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
|
||||
// CHECK-LIBOMPTARGET: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc"{{.*}}
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB
|
||||
// CHECK-NOGPULIB-NOT: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx803.bc"{{.*}}
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
|
||||
// CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_BC]]"], output: "[[BINARY:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
|
||||
// CHECK-EMIT-LLVM-IR: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
|
||||
|
||||
// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NEW
|
||||
// CHECK-LIB-DEVICE-NEW: {{.*}}clang-linker-wrapper{{.*}}--bitcode-library=openmp-amdgcn-amd-amdhsa-gfx803={{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
|
|
@ -1,79 +1,53 @@
|
|||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
|
||||
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
|
||||
// RUN: | FileCheck %s
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
|
||||
// RUN: --offload-arch=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
// verify the tools invocations
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "c"
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"
|
||||
// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "gfx906" "-fcuda-is-device"{{.*}}"-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx906.bc"{{.*}}
|
||||
// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc"
|
||||
// CHECK: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
|
||||
// CHECK: lld{{.*}}"-flavor" "gnu" "--no-undefined" "-shared" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}.out" "{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-{{.*}}.o"
|
||||
// CHECK: clang-offload-wrapper{{.*}}"-target" "x86_64-unknown-linux-gnu" "-o" "{{.*}}a-{{.*}}.bc" {{.*}}amdgpu-openmp-toolchain-{{.*}}.out"
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-o" "{{.*}}a-{{.*}}.o" "-x" "ir" "{{.*}}a-{{.*}}.bc"
|
||||
// CHECK: ld{{.*}}"-o" "a.out"{{.*}}"{{.*}}amdgpu-openmp-toolchain-{{.*}}.o" "{{.*}}a-{{.*}}.o" "-lomp" "-lomptarget"
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
|
||||
// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx906"{{.*}}"-fcuda-is-device"{{.*}}"-mlink-builtin-bitcode" "{{.*}}libomptarget-amdgpu-gfx906.bc"
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
|
||||
// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out"
|
||||
|
||||
// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
|
||||
// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \
|
||||
// RUN: | FileCheck --check-prefix=CHECK-PHASES %s
|
||||
// phases
|
||||
// CHECK-PHASES: 0: input, "{{.*}}amdgpu-openmp-toolchain.c", c, (host-openmp)
|
||||
// CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp)
|
||||
// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp)
|
||||
// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp)
|
||||
// CHECK-PHASES: 3: backend, {2}, assembler, (host-openmp)
|
||||
// CHECK-PHASES: 4: assembler, {3}, object, (host-openmp)
|
||||
// CHECK-PHASES: 5: input, "{{.*}}amdgpu-openmp-toolchain.c", c, (device-openmp)
|
||||
// CHECK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
|
||||
// CHECK-PHASES: 7: compiler, {6}, ir, (device-openmp)
|
||||
// CHECK-PHASES: 8: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {7}, ir
|
||||
// CHECK-PHASES: 9: backend, {8}, assembler, (device-openmp)
|
||||
// CHECK-PHASES: 10: assembler, {9}, object, (device-openmp)
|
||||
// CHECK-PHASES: 11: linker, {10}, image, (device-openmp)
|
||||
// CHECK-PHASES: 12: offload, "device-openmp (amdgcn-amd-amdhsa)" {11}, image
|
||||
// CHECK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
|
||||
// CHECK-PHASES: 14: backend, {13}, assembler, (host-openmp)
|
||||
// CHECK-PHASES: 15: assembler, {14}, object, (host-openmp)
|
||||
// CHECK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
|
||||
// CHECK-PHASES: 3: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHECK-PHASES: 4: preprocessor, {3}, cpp-output, (device-openmp)
|
||||
// CHECK-PHASES: 5: compiler, {4}, ir, (device-openmp)
|
||||
// CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {5}, ir
|
||||
// CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp)
|
||||
// CHECK-PHASES: 8: assembler, {7}, object, (device-openmp)
|
||||
// CHECK-PHASES: 9: offload, "device-openmp (amdgcn-amd-amdhsa)" {8}, object
|
||||
// CHECK-PHASES: 10: clang-offload-packager, {9}, image
|
||||
// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, " (x86_64-unknown-linux-gnu)" {10}, ir
|
||||
// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
|
||||
// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
|
||||
// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp)
|
||||
|
||||
// handling of --libomptarget-amdgpu-bc-path
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET
|
||||
// CHECK-LIBOMPTARGET: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc"{{.*}}
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB
|
||||
// CHECK-NOGPULIB-NOT: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx803.bc"{{.*}}
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -save-temps -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-PRINT-BINDINGS
|
||||
// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"],
|
||||
// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "clang",{{.*}} output: "[[HOST_BC:.*]]"
|
||||
// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]"], output: "[[HOST_S:.*]]"
|
||||
// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "clang::as", inputs: ["[[HOST_S]]"], output: "[[HOST_O:.*]]"
|
||||
// CHECK-PRINT-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[DEVICE_I:.*]]"
|
||||
// CHECK-PRINT-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[DEVICE_I]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.*]]"
|
||||
// CHECK-PRINT-BINDINGS: "amdgcn-amd-amdhsa" - "AMDGCN::OpenMPLinker", inputs: ["[[DEVICE_BC]]"], output: "[[DEVICE_OUT:.*]]"
|
||||
// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "offload wrapper", inputs: ["[[DEVICE_OUT]]"], output: "[[OFFLOAD_WRAPPER:.*]]"
|
||||
// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[OFFLOAD_WRAPPER]]"], output: "[[OFFLOAD_S:.*]]"
|
||||
// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "clang::as", inputs: ["[[OFFLOAD_S]]"], output: "[[OFFLOAD_O:.*]]"
|
||||
// CHECK-PRINT-BINDINGS: "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOST_O]]", "[[OFFLOAD_O]]"], output:
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
|
||||
// CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_BC]]"], output: "[[BINARY:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
|
||||
|
||||
// verify the llc is invoked for textual assembly output
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib -save-temps %s 2>&1 \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-SAVE-ASM
|
||||
// CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=asm" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.s"
|
||||
// CHECK-SAVE-ASM: llc{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906-linked.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=gfx906" "-filetype=obj" "-o"{{.*}}amdgpu-openmp-toolchain-{{.*}}-gfx906.o"
|
||||
|
||||
// check the handling of -c
|
||||
// RUN: %clang -ccc-print-bindings -c --target=x86_64-unknown-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib -save-temps %s 2>&1 \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-C
|
||||
// CHECK-C: "x86_64-unknown-linux-gnu" - "clang",
|
||||
// CHECK-C: "x86_64-unknown-linux-gnu" - "clang",{{.*}}output: "[[HOST_BC:.*]]"
|
||||
// CHECK-C: "amdgcn-amd-amdhsa" - "clang",{{.*}}output: "[[DEVICE_I:.*]]"
|
||||
// CHECK-C: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[DEVICE_I]]", "[[HOST_BC]]"]
|
||||
// CHECK-C: "x86_64-unknown-linux-gnu" - "clang"
|
||||
// CHECK-C: "x86_64-unknown-linux-gnu" - "clang::as"
|
||||
// CHECK-C: "x86_64-unknown-linux-gnu" - "offload bundler"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
|
||||
// CHECK-EMIT-LLVM-IR: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"
|
||||
|
||||
// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE
|
||||
// CHECK-LIB-DEVICE: {{.*}}llvm-link{{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
|
||||
// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NEW
|
||||
// CHECK-LIB-DEVICE-NEW: {{.*}}clang-linker-wrapper{{.*}}--bitcode-library=openmp-amdgcn-amd-amdhsa-gfx803={{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
|
||||
|
|
|
@ -1,77 +0,0 @@
|
|||
// REQUIRES: x86-registered-target
|
||||
|
||||
//
|
||||
// Check help message.
|
||||
//
|
||||
// RUN: clang-offload-wrapper --help | FileCheck %s --check-prefix CHECK-HELP
|
||||
// CHECK-HELP: {{.*}}OVERVIEW: A tool to create a wrapper bitcode for offload target binaries. Takes offload
|
||||
// CHECK-HELP: {{.*}}target binaries as input and produces bitcode file containing target binaries packaged
|
||||
// CHECK-HELP: {{.*}}as data and initialization code which registers target binaries in offload runtime.
|
||||
// CHECK-HELP: {{.*}}USAGE: clang-offload-wrapper [options] <input files>
|
||||
// CHECK-HELP: {{.*}} -o <filename> - Output filename
|
||||
// CHECK-HELP: {{.*}} --target=<triple> - Target triple for the output module
|
||||
|
||||
//
|
||||
// Generate a file to wrap.
|
||||
//
|
||||
// RUN: echo 'Content of device file' > %t.tgt
|
||||
|
||||
//
|
||||
// Check bitcode produced by the wrapper tool.
|
||||
//
|
||||
// RUN: clang-offload-wrapper -add-omp-offload-notes -target=x86_64-pc-linux-gnu -o %t.wrapper.bc %t.tgt 2>&1 | FileCheck %s --check-prefix ELF-WARNING
|
||||
// RUN: llvm-dis %t.wrapper.bc -o - | FileCheck %s --check-prefix CHECK-IR
|
||||
|
||||
// ELF-WARNING: is not an ELF image, so notes cannot be added to it.
|
||||
// CHECK-IR: target triple = "x86_64-pc-linux-gnu"
|
||||
|
||||
// CHECK-IR-DAG: [[ENTTY:%.+]] = type { ptr, ptr, i{{32|64}}, i32, i32 }
|
||||
// CHECK-IR-DAG: [[IMAGETY:%.+]] = type { ptr, ptr, ptr, ptr }
|
||||
// CHECK-IR-DAG: [[DESCTY:%.+]] = type { i32, ptr, ptr, ptr }
|
||||
|
||||
// CHECK-IR: [[ENTBEGIN:@.+]] = external hidden constant [[ENTTY]]
|
||||
// CHECK-IR: [[ENTEND:@.+]] = external hidden constant [[ENTTY]]
|
||||
|
||||
// CHECK-IR: [[DUMMY:@.+]] = hidden constant [0 x [[ENTTY]]] zeroinitializer, section "omp_offloading_entries"
|
||||
|
||||
// CHECK-IR: [[BIN:@.+]] = internal unnamed_addr constant [[BINTY:\[[0-9]+ x i8\]]] c"Content of device file{{.+}}"
|
||||
|
||||
// CHECK-IR: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[IMAGETY]]] [{{.+}} { ptr [[BIN]], ptr getelementptr inbounds ([[BINTY]], ptr [[BIN]], i64 1, i64 0), ptr [[ENTBEGIN]], ptr [[ENTEND]] }]
|
||||
|
||||
// CHECK-IR: [[DESC:@.+]] = internal constant [[DESCTY]] { i32 1, ptr [[IMAGES]], ptr [[ENTBEGIN]], ptr [[ENTEND]] }
|
||||
|
||||
// CHECK-IR: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr [[REGFN:@.+]], ptr null }]
|
||||
// CHECK-IR: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr [[UNREGFN:@.+]], ptr null }]
|
||||
|
||||
// CHECK-IR: define internal void [[REGFN]]()
|
||||
// CHECK-IR: call void @__tgt_register_lib(ptr [[DESC]])
|
||||
// CHECK-IR: ret void
|
||||
|
||||
// CHECK-IR: declare void @__tgt_register_lib(ptr)
|
||||
|
||||
// CHECK-IR: define internal void [[UNREGFN]]()
|
||||
// CHECK-IR: call void @__tgt_unregister_lib(ptr [[DESC]])
|
||||
// CHECK-IR: ret void
|
||||
|
||||
// CHECK-IR: declare void @__tgt_unregister_lib(ptr)
|
||||
|
||||
// Check that clang-offload-wrapper adds LLVMOMPOFFLOAD notes
|
||||
// into the ELF offload images:
|
||||
// RUN: yaml2obj %S/Inputs/empty-elf-template.yaml -o %t.64le -DBITS=64 -DENCODING=LSB
|
||||
// RUN: clang-offload-wrapper -add-omp-offload-notes -target=x86_64-pc-linux-gnu -o %t.wrapper.elf64le.bc %t.64le
|
||||
// RUN: llvm-dis %t.wrapper.elf64le.bc -o - | FileCheck %s --check-prefix OMPNOTES
|
||||
// RUN: yaml2obj %S/Inputs/empty-elf-template.yaml -o %t.64be -DBITS=64 -DENCODING=MSB
|
||||
// RUN: clang-offload-wrapper -add-omp-offload-notes -target=x86_64-pc-linux-gnu -o %t.wrapper.elf64be.bc %t.64be
|
||||
// RUN: llvm-dis %t.wrapper.elf64be.bc -o - | FileCheck %s --check-prefix OMPNOTES
|
||||
// RUN: yaml2obj %S/Inputs/empty-elf-template.yaml -o %t.32le -DBITS=32 -DENCODING=LSB
|
||||
// RUN: clang-offload-wrapper -add-omp-offload-notes -target=x86_64-pc-linux-gnu -o %t.wrapper.elf32le.bc %t.32le
|
||||
// RUN: llvm-dis %t.wrapper.elf32le.bc -o - | FileCheck %s --check-prefix OMPNOTES
|
||||
// RUN: yaml2obj %S/Inputs/empty-elf-template.yaml -o %t.32be -DBITS=32 -DENCODING=MSB
|
||||
// RUN: clang-offload-wrapper -add-omp-offload-notes -target=x86_64-pc-linux-gnu -o %t.wrapper.elf32be.bc %t.32be
|
||||
// RUN: llvm-dis %t.wrapper.elf32be.bc -o - | FileCheck %s --check-prefix OMPNOTES
|
||||
|
||||
// There is no clean way for extracting the offload image
|
||||
// from the object file currently, so try to find
|
||||
// the inserted ELF notes in the device image variable's
|
||||
// initializer:
|
||||
// OMPNOTES: @{{.+}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"{{.*}}LLVMOMPOFFLOAD{{.*}}LLVMOMPOFFLOAD{{.*}}LLVMOMPOFFLOAD{{.*}}"
|
|
@ -1,80 +0,0 @@
|
|||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// UNSUPPORTED: -aix
|
||||
|
||||
// See the steps to create a fat archive are given at the end of the file.
|
||||
|
||||
// Given a FatArchive, clang-offload-bundler should be called to create a
|
||||
// device specific archive, which should be passed to llvm-link.
|
||||
// RUN: %clang -O2 -### -fopenmp -fno-openmp-new-driver -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
|
||||
// CHECK: "-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "[[GPU:gfx[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.bc]]" "-x" "c++"{{.*}}.cpp
|
||||
// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-amdgcn-amd-amdhsa-[[GPU]]" "-output=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
|
||||
// CHECK: llvm-link{{.*}}"[[HOSTBC]]" "[[DEVICESPECIFICARCHIVE]]" "-o" "{{.*}}-[[GPU]]-linked-{{.*}}.bc"
|
||||
// expected-no-diagnostics
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
#define N 10
|
||||
|
||||
#pragma omp declare target
|
||||
// Functions defined in Fat Archive.
|
||||
extern "C" void func_present(float *, float *, unsigned);
|
||||
|
||||
#ifdef MISSING
|
||||
// Function not defined in the fat archive.
|
||||
extern "C" void func_missing(float *, float *, unsigned);
|
||||
#endif
|
||||
|
||||
#pragma omp end declare target
|
||||
|
||||
int main() {
|
||||
float in[N], out[N], sum = 0;
|
||||
unsigned i;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (i = 0; i < N; ++i) {
|
||||
in[i] = i;
|
||||
}
|
||||
|
||||
func_present(in, out, N); // Returns out[i] = a[i] * 0
|
||||
|
||||
#ifdef MISSING
|
||||
func_missing(in, out, N); // Should throw an error here
|
||||
#endif
|
||||
|
||||
#pragma omp parallel for reduction(+ \
|
||||
: sum)
|
||||
for (i = 0; i < N; ++i)
|
||||
sum += out[i];
|
||||
|
||||
if (!sum)
|
||||
return 0;
|
||||
return sum;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/***********************************************
|
||||
Steps to create Fat Archive (libFatArchive.a)
|
||||
************************************************
|
||||
***************** File: func_1.c ***************
|
||||
void func_present(float* in, float* out, unsigned n){
|
||||
unsigned i;
|
||||
#pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
|
||||
for(i=0; i<n; ++i){
|
||||
out[i] = in[i] * 0;
|
||||
}
|
||||
}
|
||||
*************************************************
|
||||
1. Compile source file(s) to generate object file(s)
|
||||
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_1.c -o func_1_nvptx.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_2.c -o func_2_nvptx.o
|
||||
|
||||
2. Create a fat archive by combining all the object file(s)
|
||||
llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
|
||||
************************************************/
|
|
@ -1,80 +0,0 @@
|
|||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: nvptx-registered-target
|
||||
|
||||
// See the steps to create a fat archive are given at the end of the file.
|
||||
|
||||
// Given a FatArchive, clang-offload-bundler should be called to create a
|
||||
// device specific archive, which should be passed to clang-nvlink-wrapper.
|
||||
// RUN: %clang -O2 -### -fopenmp -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
|
||||
// CHECK: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
|
||||
// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-nvidia-cuda-[[GPU]]" "-output=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
|
||||
// CHECK: clang-nvlink-wrapper{{.*}}"-o" "{{.*}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]"
|
||||
// RUN: not %clang -fopenmp -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %s %S/Inputs/openmp_static_device_link/empty.o --libomptarget-nvptx-bc-path=%S/Inputs/openmp_static_device_link/lib.bc 2>&1 | FileCheck %s --check-prefix=EMPTY
|
||||
// EMPTY-NOT: Could not open input file
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
#define N 10
|
||||
|
||||
#pragma omp declare target
|
||||
// Functions defined in Fat Archive.
|
||||
extern "C" void func_present(float *, float *, unsigned);
|
||||
|
||||
#ifdef MISSING
|
||||
// Function not defined in the fat archive.
|
||||
extern "C" void func_missing(float *, float *, unsigned);
|
||||
#endif
|
||||
|
||||
#pragma omp end declare target
|
||||
|
||||
int main() {
|
||||
float in[N], out[N], sum = 0;
|
||||
unsigned i;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (i = 0; i < N; ++i) {
|
||||
in[i] = i;
|
||||
}
|
||||
|
||||
func_present(in, out, N); // Returns out[i] = a[i] * 0
|
||||
|
||||
#ifdef MISSING
|
||||
func_missing(in, out, N); // Should throw an error here
|
||||
#endif
|
||||
|
||||
#pragma omp parallel for reduction(+ \
|
||||
: sum)
|
||||
for (i = 0; i < N; ++i)
|
||||
sum += out[i];
|
||||
|
||||
if (!sum)
|
||||
return 0;
|
||||
return sum;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/***********************************************
|
||||
Steps to create Fat Archive (libFatArchive.a)
|
||||
************************************************
|
||||
***************** File: func_1.c ***************
|
||||
void func_present(float* in, float* out, unsigned n){
|
||||
unsigned i;
|
||||
#pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
|
||||
for(i=0; i<n; ++i){
|
||||
out[i] = in[i] * 0;
|
||||
}
|
||||
}
|
||||
*************************************************
|
||||
1. Compile source file(s) to generate object file(s)
|
||||
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -c func_1.c -o func_1_nvptx.o
|
||||
clang -O2 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -c func_2.c -o func_2_nvptx.o
|
||||
|
||||
2. Create a fat archive by combining all the object file(s)
|
||||
llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
|
||||
************************************************/
|
|
@ -1,133 +0,0 @@
|
|||
///
|
||||
/// Perform several driver tests for OpenMP offloading
|
||||
///
|
||||
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: nvptx-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
|
||||
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
|
||||
// RUN: | FileCheck %s
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: --offload-arch=sm_52 \
|
||||
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
// verify the tools invocations
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
|
||||
// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52"
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
|
||||
// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out"
|
||||
|
||||
// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \
|
||||
// RUN: | FileCheck --check-prefix=CHECK-PHASES %s
|
||||
// CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp)
|
||||
// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp)
|
||||
// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp)
|
||||
// CHECK-PHASES: 3: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHECK-PHASES: 4: preprocessor, {3}, cpp-output, (device-openmp)
|
||||
// CHECK-PHASES: 5: compiler, {4}, ir, (device-openmp)
|
||||
// CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {5}, ir
|
||||
// CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp)
|
||||
// CHECK-PHASES: 8: assembler, {7}, object, (device-openmp)
|
||||
// CHECK-PHASES: 9: offload, "device-openmp (nvptx64-nvidia-cuda)" {8}, object
|
||||
// CHECK-PHASES: 10: clang-offload-packager, {9}, image
|
||||
// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, " (x86_64-unknown-linux-gnu)" {10}, ir
|
||||
// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
|
||||
// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
|
||||
// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp)
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
|
||||
// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.+]]"
|
||||
// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC]]"], output: "[[DEVICE_OBJ:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ]]"], output: "[[BINARY:.+.out]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib -save-temps %s 2>&1 | FileCheck %s --check-prefix=CHECK-TEMP-BINDINGS
|
||||
// CHECK-TEMP-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ:.+]]"], output: "[[BINARY:.+.out]]"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70,sm_35,sm_80 --no-offload-arch=sm_35,sm_80 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
|
||||
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[BINARY:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
|
||||
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \
|
||||
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908 \
|
||||
// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU
|
||||
|
||||
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[BINARY:.*]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -x ir -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp --offload-arch=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-IR
|
||||
|
||||
// CHECK-IR: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT_IR:.+]]"], output: "[[OBJECT:.+]]"
|
||||
// CHECK-IR: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OBJECT]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
|
||||
// CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
|
||||
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvida-cuda -march=sm_70 \
|
||||
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-new-nvptx-test.bc \
|
||||
// RUN: -nogpulib %s -o openmp-offload-gpu 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=DRIVER_EMBEDDING %s
|
||||
|
||||
// DRIVER_EMBEDDING: -fembed-offload-object={{.*}}.out
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
|
||||
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
|
||||
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: --offload-device-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY
|
||||
// CHECK-DEVICE-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
|
||||
// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.*]]"
|
||||
// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "{{.*}}-openmp-nvptx64-nvidia-cuda.o"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP
|
||||
// CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 \
|
||||
// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBRARY %s
|
||||
|
||||
// CHECK-LTO-LIBRARY: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \
|
||||
// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-NO-LIBRARY %s
|
||||
|
||||
// CHECK-NO-LIBRARY-NOT: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \
|
||||
// RUN: -Xoffload-linker a -Xoffload-linker-nvptx64-nvidia-cuda b -Xoffload-linker-nvptx64 c \
|
||||
// RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-XLINKER %s
|
||||
|
||||
// CHECK-XLINKER: -device-linker=a{{.*}}-device-linker=nvptx64-nvidia-cuda=b{{.*}}-device-linker=nvptx64-nvidia-cuda=c{{.*}}--
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \
|
||||
// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-FEATURES %s
|
||||
|
||||
// CHECK-LTO-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx{{[0-9]+}}
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \
|
||||
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda --cuda-feature=+ptx64 -foffload-lto %s 2>&1 \
|
||||
// RUN: | FileCheck --check-prefix=CHECK-SET-FEATURES %s
|
||||
|
||||
// CHECK-SET-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx64
|
|
@ -7,100 +7,24 @@
|
|||
// REQUIRES: nvptx-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// UNSUPPORTED: aix
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check -Xopenmp-target uses one of the archs provided when several archs are used.
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: -fno-openmp-new-driver -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
|
||||
// RUN: -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
|
||||
|
||||
// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60"
|
||||
// CHK-FOPENMP-TARGET-ARCHS: nvlink{{.*}}" "-arch" "sm_60"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present.
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver \
|
||||
// RUN: %clang -### -fopenmp=libomp \
|
||||
// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \
|
||||
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
|
||||
|
||||
// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35"
|
||||
// CHK-FOPENMP-TARGET-COMPILATION: nvlink{{.*}}" "-arch" "sm_35"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check cubin file generation and usage by nvlink
|
||||
// RUN: %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp \
|
||||
// RUN: -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
|
||||
/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
|
||||
// RUN: %clang -### --target=x86_64-apple-darwin17.0.0 -fopenmp=libomp \
|
||||
// RUN: -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
|
||||
|
||||
// CHK-CUBIN-NVLINK: clang{{.*}}" {{.*}}"-fopenmp-is-device" {{.*}}"-o" "[[PTX:.*\.s]]"
|
||||
// CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
|
||||
// CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check unbundlink of assembly file, cubin file generation and usage by nvlink
|
||||
// RUN: touch %t.s
|
||||
// RUN: %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: -fno-openmp-new-driver -save-temps %t.s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK %s
|
||||
|
||||
/// Use DAG to ensure that assembly file has been unbundled.
|
||||
// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX:.*\.s]]"
|
||||
// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=s" {{.*}}"-output={{.*}}[[PTX]]
|
||||
// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG-SAME: "-unbundle"
|
||||
// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check cubin file generation and bundling
|
||||
// RUN: %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: -fno-openmp-new-driver -save-temps %s -c 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
|
||||
|
||||
// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
|
||||
// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
|
||||
// CHK-PTXAS-CUBIN-BUNDLING: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-input={{.*}}[[CUBIN]]
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check cubin file unbundling and usage by nvlink
|
||||
// RUN: touch %t.o
|
||||
// RUN: %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: -fno-openmp-new-driver -save-temps %t.o %S/Inputs/in.so 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
|
||||
|
||||
/// Use DAG to ensure that cubin file has been unbundled.
|
||||
// CHK-CUBIN-UNBUNDLING-NVLINK-NOT: clang-offload-bundler{{.*}}" "-type=o"{{.*}}in.so
|
||||
// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[CUBIN:.*\.cubin]]"
|
||||
// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-output={{.*}}[[CUBIN]]
|
||||
// CHK-CUBIN-UNBUNDLING-NVLINK-DAG-SAME: "-unbundle"
|
||||
// CHK-CUBIN-UNBUNDLING-NVLINK-NOT: clang-offload-bundler{{.*}}" "-type=o"{{.*}}in.so
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check cubin file generation and usage by nvlink
|
||||
// RUN: touch %t1.o
|
||||
// RUN: touch %t2.o
|
||||
// RUN: %clang -### --target=powerpc64le-unknown-linux-gnu -fopenmp=libomp \
|
||||
// RUN: -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
|
||||
/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
|
||||
// RUN: %clang -### --target=x86_64-apple-darwin17.0.0 -fopenmp=libomp \
|
||||
// RUN: -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
|
||||
|
||||
// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP.
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
|
||||
|
@ -208,17 +132,17 @@
|
|||
// CHK-CUDA-VERSION-ERROR: NVPTX target requires CUDA 9.2 or above; CUDA 9.0 detected
|
||||
|
||||
/// Check that debug info is emitted in dwarf-2
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=NO_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
|
||||
|
||||
// DEBUG_DIRECTIVES-NOT: warning: debug
|
||||
|
@ -231,29 +155,26 @@
|
|||
// DEBUG_DIRECTIVES-SAME: "-fopenmp-is-device"
|
||||
// DEBUG_DIRECTIVES: ptxas
|
||||
// DEBUG_DIRECTIVES: "-lineinfo"
|
||||
// NO_DEBUG-NOT: "-g"
|
||||
// NO_DEBUG: nvlink
|
||||
// NO_DEBUG-NOT: "-g"
|
||||
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
// RUN: %clang -### -fno-openmp-new-driver -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
|
||||
|
||||
// HAS_DEBUG-NOT: warning: debug
|
||||
|
@ -265,8 +186,6 @@
|
|||
// HAS_DEBUG-SAME: "-g"
|
||||
// HAS_DEBUG-SAME: "--dont-merge-basicblocks"
|
||||
// HAS_DEBUG-SAME: "--return-at-end"
|
||||
// HAS_DEBUG: nvlink
|
||||
// HAS_DEBUG-SAME: "-g"
|
||||
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
|
||||
|
@ -330,3 +249,129 @@
|
|||
|
||||
// TRIPLE: "-triple" "nvptx64-nvidia-cuda"
|
||||
// TRIPLE: "-target-cpu" "sm_35"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
|
||||
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
|
||||
// RUN: | FileCheck %s
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: --offload-arch=sm_52 \
|
||||
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
// verify the tools invocations
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
|
||||
// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52"
|
||||
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
|
||||
// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out"
|
||||
|
||||
// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \
|
||||
// RUN: | FileCheck --check-prefix=CHECK-PHASES %s
|
||||
// CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp)
|
||||
// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp)
|
||||
// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp)
|
||||
// CHECK-PHASES: 3: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHECK-PHASES: 4: preprocessor, {3}, cpp-output, (device-openmp)
|
||||
// CHECK-PHASES: 5: compiler, {4}, ir, (device-openmp)
|
||||
// CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {5}, ir
|
||||
// CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp)
|
||||
// CHECK-PHASES: 8: assembler, {7}, object, (device-openmp)
|
||||
// CHECK-PHASES: 9: offload, "device-openmp (nvptx64-nvidia-cuda)" {8}, object
|
||||
// CHECK-PHASES: 10: clang-offload-packager, {9}, image
|
||||
// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, " (x86_64-unknown-linux-gnu)" {10}, ir
|
||||
// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
|
||||
// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
|
||||
// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp)
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
|
||||
// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.+]]"
|
||||
// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC]]"], output: "[[DEVICE_OBJ:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ]]"], output: "[[BINARY:.+.out]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
|
||||
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib -save-temps %s 2>&1 | FileCheck %s --check-prefix=CHECK-TEMP-BINDINGS
|
||||
// CHECK-TEMP-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ:.+]]"], output: "[[BINARY:.+.out]]"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70,sm_35,sm_80 --no-offload-arch=sm_35,sm_80 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
|
||||
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[BINARY:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.*]]"
|
||||
// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
|
||||
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \
|
||||
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908 \
|
||||
// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU
|
||||
|
||||
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[BINARY:.*]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
|
||||
// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -x ir -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp --offload-arch=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-IR
|
||||
|
||||
// CHECK-IR: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT_IR:.+]]"], output: "[[OBJECT:.+]]"
|
||||
// CHECK-IR: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OBJECT]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
|
||||
// CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
|
||||
|
||||
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvida-cuda -march=sm_70 \
|
||||
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-new-nvptx-test.bc \
|
||||
// RUN: -nogpulib %s -o openmp-offload-gpu 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=DRIVER_EMBEDDING %s
|
||||
|
||||
// DRIVER_EMBEDDING: -fembed-offload-object={{.*}}.out
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
|
||||
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
|
||||
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: --offload-device-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY
|
||||
// CHECK-DEVICE-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
|
||||
// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.*]]"
|
||||
// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "{{.*}}-openmp-nvptx64-nvidia-cuda.o"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
|
||||
// RUN: --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP
|
||||
// CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-"
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 \
|
||||
// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBRARY %s
|
||||
|
||||
// CHECK-LTO-LIBRARY: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \
|
||||
// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-NO-LIBRARY %s
|
||||
|
||||
// CHECK-NO-LIBRARY-NOT: {{.*}}-lomptarget{{.*}}-lomptarget.devicertl
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \
|
||||
// RUN: -Xoffload-linker a -Xoffload-linker-nvptx64-nvidia-cuda b -Xoffload-linker-nvptx64 c \
|
||||
// RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-XLINKER %s
|
||||
|
||||
// CHECK-XLINKER: -device-linker=a{{.*}}-device-linker=nvptx64-nvidia-cuda=b{{.*}}-device-linker=nvptx64-nvidia-cuda=c{{.*}}--
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \
|
||||
// RUN: -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-FEATURES %s
|
||||
|
||||
// CHECK-LTO-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx{{[0-9]+}}
|
||||
|
||||
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \
|
||||
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda --cuda-feature=+ptx64 -foffload-lto %s 2>&1 \
|
||||
// RUN: | FileCheck --check-prefix=CHECK-SET-FEATURES %s
|
||||
|
||||
// CHECK-SET-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx64
|
||||
|
|
|
@ -98,558 +98,73 @@
|
|||
/// We should have an offload action joining the host compile and device
|
||||
/// preprocessor and another one joining the device linking outputs to the host
|
||||
/// action.
|
||||
// RUN: %clang -ccc-print-phases -fopenmp=libomp -fno-openmp-new-driver --target=powerpc64le-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-PHASES %s
|
||||
// CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c, (host-openmp)
|
||||
// CHK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp)
|
||||
// CHK-PHASES: 2: compiler, {1}, ir, (host-openmp)
|
||||
// CHK-PHASES: 3: backend, {2}, assembler, (host-openmp)
|
||||
// CHK-PHASES: 4: assembler, {3}, object, (host-openmp)
|
||||
// CHK-PHASES: 5: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES: 7: compiler, {6}, ir, (device-openmp)
|
||||
// CHK-PHASES: 8: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {7}, ir
|
||||
// CHK-PHASES: 9: backend, {8}, assembler, (device-openmp)
|
||||
// CHK-PHASES: 10: assembler, {9}, object, (device-openmp)
|
||||
// CHK-PHASES: 11: linker, {10}, image, (device-openmp)
|
||||
// CHK-PHASES: 12: offload, "device-openmp (x86_64-pc-linux-gnu)" {11}, image
|
||||
// CHK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
|
||||
// CHK-PHASES: 14: backend, {13}, assembler, (host-openmp)
|
||||
// CHK-PHASES: 15: assembler, {14}, object, (host-openmp)
|
||||
// CHK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check the phases when using multiple targets. Here we also add a library to
|
||||
/// make sure it is treated as input by the device.
|
||||
// RUN: %clang -ccc-print-phases -lsomelib -fopenmp=libomp -fno-openmp-new-driver --target=powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-PHASES-LIB %s
|
||||
// CHK-PHASES-LIB: 0: input, "somelib", object, (host-openmp)
|
||||
// CHK-PHASES-LIB: 1: input, "[[INPUT:.+\.c]]", c, (host-openmp)
|
||||
// CHK-PHASES-LIB: 2: preprocessor, {1}, cpp-output, (host-openmp)
|
||||
// CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp)
|
||||
// CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp)
|
||||
// CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp)
|
||||
// CHK-PHASES-LIB: 6: input, "somelib", object, (device-openmp)
|
||||
// CHK-PHASES-LIB: 7: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-PHASES-LIB: 8: preprocessor, {7}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-LIB: 9: compiler, {8}, ir, (device-openmp)
|
||||
// CHK-PHASES-LIB: 10: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {9}, ir
|
||||
// CHK-PHASES-LIB: 11: backend, {10}, assembler, (device-openmp)
|
||||
// CHK-PHASES-LIB: 12: assembler, {11}, object, (device-openmp)
|
||||
// CHK-PHASES-LIB: 13: linker, {6, 12}, image, (device-openmp)
|
||||
// CHK-PHASES-LIB: 14: offload, "device-openmp (x86_64-pc-linux-gnu)" {13}, image
|
||||
// CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp)
|
||||
// CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-LIB: 18: compiler, {17}, ir, (device-openmp)
|
||||
// CHK-PHASES-LIB: 19: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {18}, ir
|
||||
// CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp)
|
||||
// CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp)
|
||||
// CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp)
|
||||
// CHK-PHASES-LIB: 23: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image
|
||||
// CHK-PHASES-LIB: 24: clang-offload-wrapper, {14, 23}, ir, (host-openmp)
|
||||
// CHK-PHASES-LIB: 25: backend, {24}, assembler, (host-openmp)
|
||||
// CHK-PHASES-LIB: 26: assembler, {25}, object, (host-openmp)
|
||||
// CHK-PHASES-LIB: 27: linker, {0, 5, 26}, image, (host-openmp)
|
||||
// RUN: %clang -ccc-print-phases -fopenmp=libomp --target=powerpc64-ibm-linux-gnu \
|
||||
// RUN: -fopenmp-targets=powerpc64-ibm-linux-gnu %s 2>&1 | FileCheck -check-prefix=CHK-PHASES %s
|
||||
// CHK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp)
|
||||
// CHK-PHASES-NEXT: 1: preprocessor, {0}, cpp-output, (host-openmp)
|
||||
// CHK-PHASES-NEXT: 2: compiler, {1}, ir, (host-openmp)
|
||||
// CHK-PHASES-NEXT: 3: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-PHASES-NEXT: 4: preprocessor, {3}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-NEXT: 5: compiler, {4}, ir, (device-openmp)
|
||||
// CHK-PHASES-NEXT: 6: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {2}, "device-openmp (powerpc64-ibm-linux-gnu)" {5}, ir
|
||||
// CHK-PHASES-NEXT: 7: backend, {6}, assembler, (device-openmp)
|
||||
// CHK-PHASES-NEXT: 8: assembler, {7}, object, (device-openmp)
|
||||
// CHK-PHASES-NEXT: 9: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {8}, object
|
||||
// CHK-PHASES-NEXT: 10: clang-offload-packager, {9}, image
|
||||
// CHK-PHASES-NEXT: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {2}, " (powerpc64-ibm-linux-gnu)" {10}, ir
|
||||
// CHK-PHASES-NEXT: 12: backend, {11}, assembler, (host-openmp)
|
||||
// CHK-PHASES-NEXT: 13: assembler, {12}, object, (host-openmp)
|
||||
// CHK-PHASES-NEXT: 14: clang-linker-wrapper, {13}, image, (host-openmp)
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check the phases when using multiple targets and multiple source files
|
||||
// RUN: echo " " > %t.c
|
||||
// RUN: %clang -ccc-print-phases -lsomelib -fopenmp=libomp -fno-openmp-new-driver --target=powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %t.c 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-PHASES-FILES %s
|
||||
// CHK-PHASES-FILES: 0: input, "somelib", object, (host-openmp)
|
||||
// CHK-PHASES-FILES: 1: input, "[[INPUT1:.+\.c]]", c, (host-openmp)
|
||||
// CHK-PHASES-FILES: 2: preprocessor, {1}, cpp-output, (host-openmp)
|
||||
// CHK-PHASES-FILES: 3: compiler, {2}, ir, (host-openmp)
|
||||
// CHK-PHASES-FILES: 4: backend, {3}, assembler, (host-openmp)
|
||||
// CHK-PHASES-FILES: 5: assembler, {4}, object, (host-openmp)
|
||||
// CHK-PHASES-FILES: 6: input, "[[INPUT2:.+\.c]]", c, (host-openmp)
|
||||
// CHK-PHASES-FILES: 7: preprocessor, {6}, cpp-output, (host-openmp)
|
||||
// CHK-PHASES-FILES: 8: compiler, {7}, ir, (host-openmp)
|
||||
// CHK-PHASES-FILES: 9: backend, {8}, assembler, (host-openmp)
|
||||
// CHK-PHASES-FILES: 10: assembler, {9}, object, (host-openmp)
|
||||
// CHK-PHASES-FILES: 11: input, "somelib", object, (device-openmp)
|
||||
// CHK-PHASES-FILES: 12: input, "[[INPUT1]]", c, (device-openmp)
|
||||
// CHK-PHASES-FILES: 13: preprocessor, {12}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-FILES: 14: compiler, {13}, ir, (device-openmp)
|
||||
// CHK-PHASES-FILES: 15: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir
|
||||
// CHK-PHASES-FILES: 16: backend, {15}, assembler, (device-openmp)
|
||||
// CHK-PHASES-FILES: 17: assembler, {16}, object, (device-openmp)
|
||||
// CHK-PHASES-FILES: 18: input, "[[INPUT2]]", c, (device-openmp)
|
||||
// CHK-PHASES-FILES: 19: preprocessor, {18}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-FILES: 20: compiler, {19}, ir, (device-openmp)
|
||||
// CHK-PHASES-FILES: 21: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {20}, ir
|
||||
// CHK-PHASES-FILES: 22: backend, {21}, assembler, (device-openmp)
|
||||
// CHK-PHASES-FILES: 23: assembler, {22}, object, (device-openmp)
|
||||
// CHK-PHASES-FILES: 24: linker, {11, 17, 23}, image, (device-openmp)
|
||||
// CHK-PHASES-FILES: 25: offload, "device-openmp (x86_64-pc-linux-gnu)" {24}, image
|
||||
// CHK-PHASES-FILES: 26: input, "somelib", object, (device-openmp)
|
||||
// CHK-PHASES-FILES: 27: input, "[[INPUT1]]", c, (device-openmp)
|
||||
// CHK-PHASES-FILES: 28: preprocessor, {27}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-FILES: 29: compiler, {28}, ir, (device-openmp)
|
||||
// CHK-PHASES-FILES: 30: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {29}, ir
|
||||
// CHK-PHASES-FILES: 31: backend, {30}, assembler, (device-openmp)
|
||||
// CHK-PHASES-FILES: 32: assembler, {31}, object, (device-openmp)
|
||||
// CHK-PHASES-FILES: 33: input, "[[INPUT2]]", c, (device-openmp)
|
||||
// CHK-PHASES-FILES: 34: preprocessor, {33}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-FILES: 35: compiler, {34}, ir, (device-openmp)
|
||||
// CHK-PHASES-FILES: 36: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (powerpc64-ibm-linux-gnu)" {35}, ir
|
||||
// CHK-PHASES-FILES: 37: backend, {36}, assembler, (device-openmp)
|
||||
// CHK-PHASES-FILES: 38: assembler, {37}, object, (device-openmp)
|
||||
// CHK-PHASES-FILES: 39: linker, {26, 32, 38}, image, (device-openmp)
|
||||
// CHK-PHASES-FILES: 40: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image
|
||||
// CHK-PHASES-FILES: 41: clang-offload-wrapper, {25, 40}, ir, (host-openmp)
|
||||
// CHK-PHASES-FILES: 42: backend, {41}, assembler, (host-openmp)
|
||||
// CHK-PHASES-FILES: 43: assembler, {42}, object, (host-openmp)
|
||||
// CHK-PHASES-FILES: 44: linker, {0, 5, 10, 43}, image, (host-openmp)
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check the phases graph when using a single GPU target, and check the OpenMP
|
||||
/// and CUDA phases are articulated correctly.
|
||||
// RUN: %clang -ccc-print-phases -fopenmp=libomp -fno-openmp-new-driver --target=powerpc64le-ibm-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -x cuda %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-PHASES-WITH-CUDA %s
|
||||
// CHK-PHASES-WITH-CUDA: 0: input, "[[INPUT:.+\.c]]", cuda, (host-cuda-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 2: compiler, {1}, ir, (host-cuda-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_{{.*}})
|
||||
// CHK-PHASES-WITH-CUDA: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_{{.*}})
|
||||
// CHK-PHASES-WITH-CUDA: 5: compiler, {4}, ir, (device-cuda, sm_{{.*}})
|
||||
// CHK-PHASES-WITH-CUDA: 6: backend, {5}, assembler, (device-cuda, sm_{{.*}})
|
||||
// CHK-PHASES-WITH-CUDA: 7: assembler, {6}, object, (device-cuda, sm_{{.*}})
|
||||
// CHK-PHASES-WITH-CUDA: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_{{.*}})" {7}, object
|
||||
// CHK-PHASES-WITH-CUDA: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_{{.*}})" {6}, assembler
|
||||
// CHK-PHASES-WITH-CUDA: 10: linker, {8, 9}, cuda-fatbin, (device-cuda)
|
||||
// CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir
|
||||
// CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 13: assembler, {12}, object, (host-cuda-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 14: input, "[[INPUT]]", cuda, (device-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 15: preprocessor, {14}, cuda-cpp-output, (device-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 16: compiler, {15}, ir, (device-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 17: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {16}, ir
|
||||
// CHK-PHASES-WITH-CUDA: 18: backend, {17}, assembler, (device-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 19: assembler, {18}, object, (device-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 20: linker, {19}, image, (device-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 21: offload, "device-openmp (nvptx64-nvidia-cuda)" {20}, image
|
||||
// CHK-PHASES-WITH-CUDA: 22: clang-offload-wrapper, {21}, ir, (host-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 23: backend, {22}, assembler, (host-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 24: assembler, {23}, object, (host-openmp)
|
||||
// CHK-PHASES-WITH-CUDA: 25: linker, {13, 24}, image, (host-cuda-openmp)
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check of the commands passed to each tool when using valid OpenMP targets.
|
||||
/// Here we also check that offloading does not break the use of integrated
|
||||
/// assembler. It does however preclude the merge of the host compile and
|
||||
/// backend phases. There are also two offloading specific options:
|
||||
/// -fopenmp-is-device: will tell the frontend that it will generate code for a
|
||||
/// target.
|
||||
/// -fopenmp-host-ir-file-path: specifies the host IR file that can be loaded by
|
||||
/// the target code generation to gather information about which declaration
|
||||
/// really need to be emitted.
|
||||
///
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -o %t.out --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-COMMANDS %s
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -o %t.out --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST %s
|
||||
|
||||
//
|
||||
// Generate host BC file and host object.
|
||||
//
|
||||
// CHK-COMMANDS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-disable-llvm-passes"
|
||||
// CHK-COMMANDS-SAME: "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
|
||||
// CHK-COMMANDS-SAME: "-o" "
|
||||
// CHK-COMMANDS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "c" "
|
||||
// CHK-COMMANDS-SAME: [[INPUT:[^\\/]+\.c]]"
|
||||
// CHK-COMMANDS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[HOSTPP:[^\\/]+\.i]]" "-x" "c" "
|
||||
// CHK-COMMANDS-ST-SAME: [[INPUT:[^\\/]+\.c]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
|
||||
// CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
|
||||
|
||||
//
|
||||
// Compile for the powerpc device.
|
||||
//
|
||||
// CHK-COMMANDS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp"
|
||||
// CHK-COMMANDS-SAME: "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]"
|
||||
// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T1PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]"
|
||||
// CHK-COMMANDS-ST: "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T1OBJ:[^\\/]+\.o]]" "{{.*}}[[T1ASM]]"
|
||||
// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-shared" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]"
|
||||
//
|
||||
// Compile for the x86 device.
|
||||
//
|
||||
// CHK-COMMANDS: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp"
|
||||
// CHK-COMMANDS-SAME: "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]"
|
||||
// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T2PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T2ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T2BC]]"
|
||||
// CHK-COMMANDS-ST: "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T2OBJ:[^\\/]+\.o]]" "{{.*}}[[T2ASM]]"
|
||||
// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-shared" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]"
|
||||
|
||||
//
|
||||
// Create wrapper BC file and wrapper object.
|
||||
//
|
||||
// CHK-COMMANDS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
|
||||
// CHK-COMMANDS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
|
||||
// CHK-COMMANDS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
|
||||
// CHK-COMMANDS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
|
||||
// CHK-COMMANDS-ST: "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]"
|
||||
|
||||
//
|
||||
// Link host binary.
|
||||
//
|
||||
// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget"
|
||||
// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-COMMANDS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check separate compilation with offloading - bundling actions
|
||||
// RUN: %clang -### -ccc-print-phases -fopenmp=libomp -fno-openmp-new-driver -c %S/Input/in.so -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-BUACTIONS %s
|
||||
|
||||
// CHK-BUACTIONS: 0: input, "[[INPUT:.+\.c]]", c, (host-openmp)
|
||||
// CHK-BUACTIONS: 1: preprocessor, {0}, cpp-output, (host-openmp)
|
||||
// CHK-BUACTIONS: 2: compiler, {1}, ir, (host-openmp)
|
||||
// CHK-BUACTIONS: 3: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-BUACTIONS: 4: preprocessor, {3}, cpp-output, (device-openmp)
|
||||
// CHK-BUACTIONS: 5: compiler, {4}, ir, (device-openmp)
|
||||
// CHK-BUACTIONS: 6: offload, "host-openmp (powerpc64le-unknown-linux)" {2}, "device-openmp (powerpc64le-ibm-linux-gnu)" {5}, ir
|
||||
// CHK-BUACTIONS: 7: backend, {6}, assembler, (device-openmp)
|
||||
// CHK-BUACTIONS: 8: assembler, {7}, object, (device-openmp)
|
||||
// CHK-BUACTIONS: 9: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {8}, object
|
||||
// CHK-BUACTIONS: 10: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-BUACTIONS: 11: preprocessor, {10}, cpp-output, (device-openmp)
|
||||
// CHK-BUACTIONS: 12: compiler, {11}, ir, (device-openmp)
|
||||
// CHK-BUACTIONS: 13: offload, "host-openmp (powerpc64le-unknown-linux)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {12}, ir
|
||||
// CHK-BUACTIONS: 14: backend, {13}, assembler, (device-openmp)
|
||||
// CHK-BUACTIONS: 15: assembler, {14}, object, (device-openmp)
|
||||
// CHK-BUACTIONS: 16: offload, "device-openmp (x86_64-pc-linux-gnu)" {15}, object
|
||||
// CHK-BUACTIONS: 17: backend, {2}, assembler, (host-openmp)
|
||||
// CHK-BUACTIONS: 18: assembler, {17}, object, (host-openmp)
|
||||
// CHK-BUACTIONS: 19: clang-offload-bundler, {9, 16, 18}, object, (host-openmp)
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check separate compilation with offloading - unbundling actions
|
||||
// RUN: touch %t.i
|
||||
// RUN: %clang -### -ccc-print-phases -fopenmp=libomp -fno-openmp-new-driver -o %t.out -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-UBACTIONS %s
|
||||
|
||||
// CHK-UBACTIONS: 0: input, "somelib", object, (host-openmp)
|
||||
// CHK-UBACTIONS: 1: input, "[[INPUT:.+\.i]]", cpp-output, (host-openmp)
|
||||
// CHK-UBACTIONS: 2: clang-offload-unbundler, {1}, cpp-output, (host-openmp)
|
||||
// CHK-UBACTIONS: 3: compiler, {2}, ir, (host-openmp)
|
||||
// CHK-UBACTIONS: 4: backend, {3}, assembler, (host-openmp)
|
||||
// CHK-UBACTIONS: 5: assembler, {4}, object, (host-openmp)
|
||||
// CHK-UBACTIONS: 6: input, "somelib", object, (device-openmp)
|
||||
// CHK-UBACTIONS: 7: compiler, {2}, ir, (device-openmp)
|
||||
// CHK-UBACTIONS: 8: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {7}, ir
|
||||
// CHK-UBACTIONS: 9: backend, {8}, assembler, (device-openmp)
|
||||
// CHK-UBACTIONS: 10: assembler, {9}, object, (device-openmp)
|
||||
// CHK-UBACTIONS: 11: linker, {6, 10}, image, (device-openmp)
|
||||
// CHK-UBACTIONS: 12: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {11}, image
|
||||
// CHK-UBACTIONS: 13: input, "somelib", object, (device-openmp)
|
||||
// CHK-UBACTIONS: 14: compiler, {2}, ir, (device-openmp)
|
||||
// CHK-UBACTIONS: 15: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir
|
||||
// CHK-UBACTIONS: 16: backend, {15}, assembler, (device-openmp)
|
||||
// CHK-UBACTIONS: 17: assembler, {16}, object, (device-openmp)
|
||||
// CHK-UBACTIONS: 18: linker, {13, 17}, image, (device-openmp)
|
||||
// CHK-UBACTIONS: 19: offload, "device-openmp (x86_64-pc-linux-gnu)" {18}, image
|
||||
// CHK-UBACTIONS: 20: clang-offload-wrapper, {12, 19}, ir, (host-openmp)
|
||||
// CHK-UBACTIONS: 21: backend, {20}, assembler, (host-openmp)
|
||||
// CHK-UBACTIONS: 22: assembler, {21}, object, (host-openmp)
|
||||
// CHK-UBACTIONS: 23: linker, {0, 5, 22}, image, (host-openmp)
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check separate compilation with offloading - unbundling/bundling actions
|
||||
// RUN: touch %t.i
|
||||
// RUN: %clang -### -ccc-print-phases -fopenmp=libomp -fno-openmp-new-driver -c -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-UBUACTIONS %s
|
||||
|
||||
// CHK-UBUACTIONS: 0: input, "[[INPUT:.+\.i]]", cpp-output, (host-openmp)
|
||||
// CHK-UBUACTIONS: 1: clang-offload-unbundler, {0}, cpp-output, (host-openmp)
|
||||
// CHK-UBUACTIONS: 2: compiler, {1}, ir, (host-openmp)
|
||||
// CHK-UBUACTIONS: 3: compiler, {1}, ir, (device-openmp)
|
||||
// CHK-UBUACTIONS: 4: offload, "host-openmp (powerpc64le-unknown-linux)" {2}, "device-openmp (powerpc64le-ibm-linux-gnu)" {3}, ir
|
||||
// CHK-UBUACTIONS: 5: backend, {4}, assembler, (device-openmp)
|
||||
// CHK-UBUACTIONS: 6: assembler, {5}, object, (device-openmp)
|
||||
// CHK-UBUACTIONS: 7: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {6}, object
|
||||
// CHK-UBUACTIONS: 8: compiler, {1}, ir, (device-openmp)
|
||||
// CHK-UBUACTIONS: 9: offload, "host-openmp (powerpc64le-unknown-linux)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir
|
||||
// CHK-UBUACTIONS: 10: backend, {9}, assembler, (device-openmp)
|
||||
// CHK-UBUACTIONS: 11: assembler, {10}, object, (device-openmp)
|
||||
// CHK-UBUACTIONS: 12: offload, "device-openmp (x86_64-pc-linux-gnu)" {11}, object
|
||||
// CHK-UBUACTIONS: 13: backend, {2}, assembler, (host-openmp)
|
||||
// CHK-UBUACTIONS: 14: assembler, {13}, object, (host-openmp)
|
||||
// CHK-UBUACTIONS: 15: clang-offload-bundler, {7, 12, 14}, object, (host-openmp)
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check separate compilation with offloading - bundling jobs construct
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -c -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-BUJOBS %s
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -c -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-BUJOBS-ST %s
|
||||
|
||||
// Create host BC.
|
||||
// CHK-BUJOBS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "c" "
|
||||
// CHK-BUJOBS-SAME: [[INPUT:[^\\/]+\.c]]"
|
||||
|
||||
// CHK-BUJOBS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[HOSTPP:[^\\/]+\.i]]" "-x" "c" "
|
||||
// CHK-BUJOBS-ST-SAME: [[INPUT:[^\\/]+\.c]]"
|
||||
// CHK-BUJOBS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
|
||||
|
||||
// Create target 1 object.
|
||||
// CHK-BUJOBS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]"
|
||||
// CHK-BUJOBS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[T1PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]"
|
||||
// CHK-BUJOBS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
|
||||
// CHK-BUJOBS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]"
|
||||
// CHK-BUJOBS-ST: "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[T1OBJ:[^\\/]+\.o]]" "{{.*}}[[T1ASM]]"
|
||||
|
||||
// Create target 2 object.
|
||||
// CHK-BUJOBS: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]"
|
||||
// CHK-BUJOBS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[T2PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]"
|
||||
// CHK-BUJOBS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
|
||||
// CHK-BUJOBS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[T2ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T2BC]]"
|
||||
// CHK-BUJOBS-ST: "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[T2OBJ:[^\\/]+\.o]]" "{{.*}}[[T2ASM]]"
|
||||
|
||||
// Create host object and bundle.
|
||||
// CHK-BUJOBS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
|
||||
// CHK-BUJOBS: clang-offload-bundler{{.*}}" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le-unknown-linux" "-output=
|
||||
// CHK-BUJOBS-SAME: [[RES:[^\\/]+\.o]]" "-input={{.*}}[[T1OBJ]]" "-input={{.*}}[[T2OBJ]]" "-input={{.*}}[[HOSTOBJ]]"
|
||||
// CHK-BUJOBS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
|
||||
// CHK-BUJOBS-ST: "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-BUJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
|
||||
// CHK-BUJOBS-ST: clang-offload-bundler{{.*}}" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le-unknown-linux" "-output=
|
||||
// CHK-BUJOBS-ST-SAME: [[RES:[^\\/]+\.o]]" "-input={{.*}}[[T1OBJ]]" "-input={{.*}}[[T2OBJ]]" "-input={{.*}}[[HOSTOBJ]]"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check separate compilation with offloading - unbundling jobs construct
|
||||
// RUN: touch %t.i
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -o %t.out -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-UBJOBS %s
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -o %t.out -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i -save-temps 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-UBJOBS-ST %s
|
||||
// RUN: touch %t.o
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -o %t.out -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.o 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-UBJOBS2 %s
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -o %t.out -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.o %S/Inputs/in.so -save-temps 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-UBJOBS2-ST %s
|
||||
|
||||
// Unbundle and create host BC.
|
||||
// CHK-UBJOBS: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-input=
|
||||
// CHK-UBJOBS-SAME: [[INPUT:[^\\/]+\.tmp\.i]]" "-output=
|
||||
// CHK-UBJOBS-SAME: [[HOSTPP:[^\\/]+\.i]]" "-output=
|
||||
// CHK-UBJOBS-SAME: [[T1PP:[^\\/]+\.i]]" "-output=
|
||||
// CHK-UBJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle" "-allow-missing-bundles"
|
||||
// CHK-UBJOBS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
|
||||
// CHK-UBJOBS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
|
||||
// CHK-UBJOBS-ST: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-input=
|
||||
// CHK-UBJOBS-ST-SAME: [[INPUT:[^\\/]+.tmp\.i]]" "-output=
|
||||
// CHK-UBJOBS-ST-SAME: [[HOSTPP:[^\\/]+linux\.i]]" "-output=
|
||||
// CHK-UBJOBS-ST-SAME: [[T1PP:[^\\/]+gnu\.i]]" "-output=
|
||||
// CHK-UBJOBS-ST-SAME: [[T2PP:[^\\/]+gnu\.i]]" "-unbundle" "-allow-missing-bundles"
|
||||
// CHK-UBJOBS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
|
||||
// CHK-UBJOBS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
|
||||
// CHK-UBJOBS-ST: "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
|
||||
|
||||
// Create target 1 object.
|
||||
// CHK-UBJOBS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
|
||||
// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]"
|
||||
// CHK-UBJOBS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
|
||||
// CHK-UBJOBS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]"
|
||||
// CHK-UBJOBS-ST: "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[T1OBJ:[^\\/]+\.o]]" "{{.*}}[[T1ASM]]"
|
||||
// CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]"
|
||||
|
||||
// Create target 2 object.
|
||||
// CHK-UBJOBS: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
|
||||
// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]"
|
||||
// CHK-UBJOBS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
|
||||
// CHK-UBJOBS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[T2ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T2BC]]"
|
||||
// CHK-UBJOBS-ST: "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[T2OBJ:[^\\/]+\.o]]" "{{.*}}[[T2ASM]]"
|
||||
// CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]"
|
||||
|
||||
// Create wrapper BC file and wrapper object.
|
||||
// CHK-UBJOBS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
|
||||
// CHK-UBJOBS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
|
||||
// CHK-UBJOBS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
|
||||
// CHK-UBJOBS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
|
||||
// CHK-UBJOBS-ST: "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]"
|
||||
|
||||
// Create binary.
|
||||
// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
|
||||
// CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
|
||||
|
||||
// Unbundle object file.
|
||||
// CHK-UBJOBS2: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-input=
|
||||
// CHK-UBJOBS2-SAME: [[INPUT:[^\\/]+tmp\.o]]" "-output=
|
||||
// CHK-UBJOBS2-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-output=
|
||||
// CHK-UBJOBS2-SAME: [[T1OBJ:[^\\/]+\.o]]" "-output=
|
||||
// CHK-UBJOBS2-SAME: [[T2OBJ:[^\\/]+\.o]]" "-unbundle" "-allow-missing-bundles"
|
||||
// CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]"
|
||||
// CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]"
|
||||
// CHK-UBJOBS2: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
|
||||
// CHK-UBJOBS2: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
|
||||
// CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
|
||||
// CHK-UBJOBS2-ST-NOT: clang-offload-bundler{{.*}}in.so
|
||||
// CHK-UBJOBS2-ST: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-input=
|
||||
// CHK-UBJOBS2-ST-SAME: [[INPUT:[^\\/]+tmp\.o]]" "-output=
|
||||
// CHK-UBJOBS2-ST-SAME: [[HOSTOBJ:[^\\/]+linux\.o]]" "-output=
|
||||
// CHK-UBJOBS2-ST-SAME: [[T1OBJ:[^\\/]+gnu\.o]]" "-output=
|
||||
// CHK-UBJOBS2-ST-SAME: [[T2OBJ:[^\\/]+gnu\.o]]" "-unbundle" "-allow-missing-bundles"
|
||||
// CHK-UBJOBS2-ST-NOT: clang-offload-bundler{{.*}}in.so
|
||||
// CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]"
|
||||
// CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]"
|
||||
// CHK-UBJOBS2-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
|
||||
// CHK-UBJOBS2-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
|
||||
// CHK-UBJOBS2-ST: "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]"
|
||||
// CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
|
||||
// CHK-UBJOBS2-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
|
||||
|
||||
/// ###########################################################################
|
||||
|
||||
/// Check separate compilation with offloading - unbundling/bundling jobs
|
||||
/// construct
|
||||
// RUN: touch %t.i
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -c %t.o -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-UBUJOBS %s
|
||||
// RUN: %clang -### -fopenmp=libomp -fno-openmp-new-driver -c %t.o -lsomelib --target=powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i -save-temps 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-UBUJOBS-ST %s
|
||||
|
||||
// Unbundle and create host BC.
|
||||
// CHK-UBUJOBS: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-input=
|
||||
// CHK-UBUJOBS-SAME: [[INPUT:[^\\/]+\.i]]" "-output=
|
||||
// CHK-UBUJOBS-SAME: [[HOSTPP:[^\\/]+\.i]]" "-output=
|
||||
// CHK-UBUJOBS-SAME: [[T1PP:[^\\/]+\.i]]" "-output=
|
||||
// CHK-UBUJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle" "-allow-missing-bundles"
|
||||
// CHK-UBUJOBS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
|
||||
|
||||
// CHK-UBUJOBS-ST: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-input=
|
||||
// CHK-UBUJOBS-ST-SAME: [[INPUT:[^\\/]+tmp\.i]]" "-output=
|
||||
// CHK-UBUJOBS-ST-SAME: [[HOSTPP:[^\\/]+linux\.i]]" "-output=
|
||||
// CHK-UBUJOBS-ST-SAME: [[T1PP:[^\\/]+gnu\.i]]" "-output=
|
||||
// CHK-UBUJOBS-ST-SAME: [[T2PP:[^\\/]+gnu\.i]]" "-unbundle" "-allow-missing-bundles"
|
||||
// CHK-UBUJOBS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
|
||||
|
||||
// Create target 1 object.
|
||||
// CHK-UBUJOBS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
|
||||
// CHK-UBUJOBS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
|
||||
// CHK-UBUJOBS-ST: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]"
|
||||
// CHK-UBUJOBS-ST: "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-ST-SAME: [[T1OBJ:[^\\/]+\.o]]" "{{.*}}[[T1ASM]]"
|
||||
|
||||
// Create target 2 object.
|
||||
// CHK-UBUJOBS: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
|
||||
// CHK-UBUJOBS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
|
||||
// CHK-UBUJOBS-ST: "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-ST-SAME: [[T2ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T2BC]]"
|
||||
// CHK-UBUJOBS-ST: "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-ST-SAME: [[T2OBJ:[^\\/]+\.o]]" "{{.*}}[[T2ASM]]"
|
||||
|
||||
// Create binary.
|
||||
// CHK-UBUJOBS: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
|
||||
// CHK-UBUJOBS: clang-offload-bundler{{.*}}" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le-unknown-linux" "-output=
|
||||
// CHK-UBUJOBS-SAME: [[RES:[^\\/]+\.o]]" "-input={{.*}}[[T1OBJ]]" "-input={{.*}}[[T2OBJ]]" "-input={{.*}}[[HOSTOBJ]]"
|
||||
// CHK-UBUJOBS-ST: "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
|
||||
// CHK-UBUJOBS-ST: "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
|
||||
// CHK-UBUJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
|
||||
// CHK-UBUJOBS-ST: clang-offload-bundler{{.*}}" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le-unknown-linux" "-output=
|
||||
// CHK-UBUJOBS-ST-SAME: [[RES:[^\\/]+\.o]]" "-input={{.*}}[[T1OBJ]]" "-input={{.*}}[[T2OBJ]]" "-input={{.*}}[[HOSTOBJ]]"
|
||||
|
||||
/// ###########################################################################
|
||||
// RUN: %clang -ccc-print-phases -lsomelib -fopenmp=libomp --target=powerpc64-ibm-linux-gnu \
|
||||
// RUN: -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %s 2>&1 | FileCheck -check-prefix=CHK-PHASES-FILES %s
|
||||
// CHK-PHASES-FILES: 0: input, "somelib", object, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 1: input, "[[INPUT:.+]]", c, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 2: preprocessor, {1}, cpp-output, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 3: compiler, {2}, ir, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 4: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 5: preprocessor, {4}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 6: compiler, {5}, ir, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 7: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {6}, ir
|
||||
// CHK-PHASES-FILES-NEXT: 8: backend, {7}, assembler, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 9: assembler, {8}, object, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 10: offload, "device-openmp (x86_64-pc-linux-gnu)" {9}, object
|
||||
// CHK-PHASES-FILES-NEXT: 11: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 12: preprocessor, {11}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 13: compiler, {12}, ir, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 14: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {13}, ir
|
||||
// CHK-PHASES-FILES-NEXT: 15: backend, {14}, assembler, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 16: assembler, {15}, object, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 17: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {16}, object
|
||||
// CHK-PHASES-FILES-NEXT: 18: clang-offload-packager, {10, 17}, image
|
||||
// CHK-PHASES-FILES-NEXT: 19: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, " (powerpc64-ibm-linux-gnu)" {18}, ir
|
||||
// CHK-PHASES-FILES-NEXT: 20: backend, {19}, assembler, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 21: assembler, {20}, object, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 22: input, "[[INPUT]]", c, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 23: preprocessor, {22}, cpp-output, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 24: compiler, {23}, ir, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 25: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 26: preprocessor, {25}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 27: compiler, {26}, ir, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 28: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {24}, "device-openmp (x86_64-pc-linux-gnu)" {27}, ir
|
||||
// CHK-PHASES-FILES-NEXT: 29: backend, {28}, assembler, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 30: assembler, {29}, object, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 31: offload, "device-openmp (x86_64-pc-linux-gnu)" {30}, object
|
||||
// CHK-PHASES-FILES-NEXT: 32: input, "[[INPUT]]", c, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 33: preprocessor, {32}, cpp-output, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 34: compiler, {33}, ir, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 35: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {24}, "device-openmp (powerpc64-ibm-linux-gnu)" {34}, ir
|
||||
// CHK-PHASES-FILES-NEXT: 36: backend, {35}, assembler, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 37: assembler, {36}, object, (device-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 38: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {37}, object
|
||||
// CHK-PHASES-FILES-NEXT: 39: clang-offload-packager, {31, 38}, image
|
||||
// CHK-PHASES-FILES-NEXT: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {24}, " (powerpc64-ibm-linux-gnu)" {39}, ir
|
||||
// CHK-PHASES-FILES-NEXT: 41: backend, {40}, assembler, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 42: assembler, {41}, object, (host-openmp)
|
||||
// CHK-PHASES-FILES-NEXT: 43: clang-linker-wrapper, {0, 21, 42}, image, (host-openmp)
|
||||
|
||||
/// Check -fopenmp-is-device is passed when compiling for the device.
|
||||
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \
|
||||
|
@ -658,7 +173,7 @@
|
|||
// CHK-FOPENMP-IS-DEVICE: "-cc1"{{.*}} "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" {{.*}}.c"
|
||||
|
||||
/// Check arguments to the linker wrapper
|
||||
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-new-driver %s 2>&1 \
|
||||
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix=CHK-NEW-DRIVER %s
|
||||
|
||||
// CHK-NEW-DRIVER: clang-linker-wrapper{{.*}}"--host-triple=powerpc64le-unknown-linux"{{.*}}--{{.*}}"-lomp"{{.*}}"-lomptarget"
|
||||
|
|
|
@ -8,11 +8,9 @@ add_clang_subdirectory(clang-format)
|
|||
add_clang_subdirectory(clang-format-vs)
|
||||
add_clang_subdirectory(clang-fuzzer)
|
||||
add_clang_subdirectory(clang-import-test)
|
||||
add_clang_subdirectory(clang-nvlink-wrapper)
|
||||
add_clang_subdirectory(clang-linker-wrapper)
|
||||
add_clang_subdirectory(clang-offload-packager)
|
||||
add_clang_subdirectory(clang-offload-bundler)
|
||||
add_clang_subdirectory(clang-offload-wrapper)
|
||||
add_clang_subdirectory(clang-scan-deps)
|
||||
if(HAVE_CLANG_REPL_SUPPORT)
|
||||
add_clang_subdirectory(clang-repl)
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
set(LLVM_LINK_COMPONENTS BitWriter Core Object Support)
|
||||
|
||||
if(NOT CLANG_BUILT_STANDALONE)
|
||||
set(tablegen_deps intrinsics_gen)
|
||||
endif()
|
||||
|
||||
add_clang_executable(clang-nvlink-wrapper
|
||||
ClangNvlinkWrapper.cpp
|
||||
|
||||
DEPENDS
|
||||
${tablegen_deps}
|
||||
)
|
||||
|
||||
set(CLANG_NVLINK_WRAPPER_LIB_DEPS
|
||||
clangBasic
|
||||
)
|
||||
|
||||
add_dependencies(clang clang-nvlink-wrapper)
|
||||
|
||||
target_link_libraries(clang-nvlink-wrapper
|
||||
PRIVATE
|
||||
${CLANG_NVLINK_WRAPPER_LIB_DEPS}
|
||||
)
|
||||
|
||||
install(TARGETS clang-nvlink-wrapper RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
|
|
@ -1,206 +0,0 @@
|
|||
//===-- clang-nvlink-wrapper/ClangNvlinkWrapper.cpp - wrapper over nvlink-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===---------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This tool works as a wrapper over nvlink program. It transparently passes
|
||||
/// every input option and objects to nvlink except archive files. It reads
|
||||
/// each input archive file to extract archived cubin files as temporary files.
|
||||
/// These temp (*.cubin) files are passed to nvlink, because nvlink does not
|
||||
/// support linking of archive files implicitly.
|
||||
///
|
||||
/// During linking of heterogeneous device archive libraries, the
|
||||
/// clang-offload-bundler creates a device specific archive of cubin files.
|
||||
/// Such an archive is then passed to this tool to extract cubin files before
|
||||
/// passing to nvlink.
|
||||
///
|
||||
/// Example:
|
||||
/// clang-nvlink-wrapper -o a.out-openmp-nvptx64 /tmp/libTest-nvptx-sm_50.a
|
||||
///
|
||||
/// 1. Extract (libTest-nvptx-sm_50.a) => /tmp/a.cubin /tmp/b.cubin
|
||||
/// 2. nvlink -o a.out-openmp-nvptx64 /tmp/a.cubin /tmp/b.cubin
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
#include "clang/Basic/Version.h"
|
||||
#include "llvm/Object/Archive.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Errc.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include "llvm/Support/Program.h"
|
||||
#include "llvm/Support/Signals.h"
|
||||
#include "llvm/Support/StringSaver.h"
|
||||
#include "llvm/Support/WithColor.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
|
||||
|
||||
// Mark all our options with this category, everything else (except for -help)
|
||||
// will be hidden.
|
||||
static cl::OptionCategory
|
||||
ClangNvlinkWrapperCategory("clang-nvlink-wrapper options");
|
||||
|
||||
static cl::opt<std::string> NvlinkUserPath("nvlink-path",
|
||||
cl::desc("Path of nvlink binary"),
|
||||
cl::cat(ClangNvlinkWrapperCategory));
|
||||
|
||||
// Do not parse nvlink options
|
||||
static cl::list<std::string>
|
||||
NVArgs(cl::Sink, cl::desc("<options to be passed to nvlink>..."));
|
||||
|
||||
static bool isEmptyFile(StringRef Filename) {
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
|
||||
MemoryBuffer::getFileOrSTDIN(Filename, false, false);
|
||||
if (std::error_code EC = BufOrErr.getError())
|
||||
return false;
|
||||
return (*BufOrErr)->getBuffer().empty();
|
||||
}
|
||||
|
||||
static Error runNVLink(std::string NVLinkPath,
|
||||
SmallVectorImpl<std::string> &Args) {
|
||||
std::vector<StringRef> NVLArgs;
|
||||
NVLArgs.push_back(NVLinkPath);
|
||||
StringRef Output = *(llvm::find(Args, "-o") + 1);
|
||||
for (auto &Arg : Args) {
|
||||
if (!(sys::fs::exists(Arg) && Arg != Output && isEmptyFile(Arg)))
|
||||
NVLArgs.push_back(Arg);
|
||||
}
|
||||
|
||||
if (sys::ExecuteAndWait(NVLinkPath, NVLArgs))
|
||||
return createStringError(inconvertibleErrorCode(), "'nvlink' failed");
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
static Error extractArchiveFiles(StringRef Filename,
|
||||
SmallVectorImpl<std::string> &Args,
|
||||
SmallVectorImpl<std::string> &TmpFiles) {
|
||||
std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
|
||||
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
|
||||
MemoryBuffer::getFileOrSTDIN(Filename, false, false);
|
||||
if (std::error_code EC = BufOrErr.getError())
|
||||
return createFileError(Filename, EC);
|
||||
|
||||
ArchiveBuffers.push_back(std::move(*BufOrErr));
|
||||
Expected<std::unique_ptr<llvm::object::Archive>> LibOrErr =
|
||||
object::Archive::create(ArchiveBuffers.back()->getMemBufferRef());
|
||||
if (!LibOrErr)
|
||||
return LibOrErr.takeError();
|
||||
|
||||
auto Archive = std::move(*LibOrErr);
|
||||
|
||||
Error Err = Error::success();
|
||||
auto ChildEnd = Archive->child_end();
|
||||
for (auto ChildIter = Archive->child_begin(Err); ChildIter != ChildEnd;
|
||||
++ChildIter) {
|
||||
if (Err)
|
||||
return Err;
|
||||
auto ChildNameOrErr = (*ChildIter).getName();
|
||||
if (!ChildNameOrErr)
|
||||
return ChildNameOrErr.takeError();
|
||||
|
||||
StringRef ChildName = sys::path::filename(ChildNameOrErr.get());
|
||||
|
||||
auto ChildBufferRefOrErr = (*ChildIter).getMemoryBufferRef();
|
||||
if (!ChildBufferRefOrErr)
|
||||
return ChildBufferRefOrErr.takeError();
|
||||
|
||||
auto ChildBuffer =
|
||||
MemoryBuffer::getMemBuffer(ChildBufferRefOrErr.get(), false);
|
||||
auto ChildNameSplit = ChildName.split('.');
|
||||
|
||||
SmallString<16> Path;
|
||||
int FileDesc;
|
||||
if (std::error_code EC = sys::fs::createTemporaryFile(
|
||||
(ChildNameSplit.first), (ChildNameSplit.second), FileDesc, Path))
|
||||
return createFileError(ChildName, EC);
|
||||
|
||||
std::string TmpFileName(Path.str());
|
||||
Args.push_back(TmpFileName);
|
||||
TmpFiles.push_back(TmpFileName);
|
||||
std::error_code EC;
|
||||
raw_fd_ostream OS(Path.c_str(), EC, sys::fs::OF_None);
|
||||
if (EC)
|
||||
return createFileError(TmpFileName, errc::io_error);
|
||||
OS << ChildBuffer->getBuffer();
|
||||
OS.close();
|
||||
}
|
||||
return Err;
|
||||
}
|
||||
|
||||
static Error cleanupTmpFiles(SmallVectorImpl<std::string> &TmpFiles) {
|
||||
for (auto &TmpFile : TmpFiles) {
|
||||
if (std::error_code EC = sys::fs::remove(TmpFile))
|
||||
return createFileError(TmpFile, errc::no_such_file_or_directory);
|
||||
}
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
static void PrintVersion(raw_ostream &OS) {
|
||||
OS << clang::getClangToolFullVersion("clang-nvlink-wrapper") << '\n';
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
sys::PrintStackTraceOnErrorSignal(argv[0]);
|
||||
cl::SetVersionPrinter(PrintVersion);
|
||||
cl::HideUnrelatedOptions(ClangNvlinkWrapperCategory);
|
||||
cl::ParseCommandLineOptions(
|
||||
argc, argv,
|
||||
"A wrapper tool over nvlink program. It transparently passes every \n"
|
||||
"input option and objects to nvlink except archive files and path of \n"
|
||||
"nvlink binary. It reads each input archive file to extract archived \n"
|
||||
"cubin files as temporary files.\n");
|
||||
|
||||
if (Help) {
|
||||
cl::PrintHelpMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto reportError = [argv](Error E) {
|
||||
logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
|
||||
exit(1);
|
||||
};
|
||||
|
||||
std::string NvlinkPath;
|
||||
SmallVector<const char *, 0> Argv(argv, argv + argc);
|
||||
SmallVector<std::string, 0> ArgvSubst;
|
||||
SmallVector<std::string, 0> TmpFiles;
|
||||
BumpPtrAllocator Alloc;
|
||||
StringSaver Saver(Alloc);
|
||||
cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
|
||||
|
||||
for (const std::string &Arg : NVArgs) {
|
||||
if (sys::path::extension(Arg) == ".a") {
|
||||
if (Error Err = extractArchiveFiles(Arg, ArgvSubst, TmpFiles))
|
||||
reportError(std::move(Err));
|
||||
} else {
|
||||
ArgvSubst.push_back(Arg);
|
||||
}
|
||||
}
|
||||
|
||||
NvlinkPath = NvlinkUserPath;
|
||||
|
||||
// If user hasn't specified nvlink binary then search it in PATH
|
||||
if (NvlinkPath.empty()) {
|
||||
ErrorOr<std::string> NvlinkPathErr = sys::findProgramByName("nvlink");
|
||||
if (!NvlinkPathErr) {
|
||||
reportError(createStringError(NvlinkPathErr.getError(),
|
||||
"unable to find 'nvlink' in path"));
|
||||
}
|
||||
NvlinkPath = NvlinkPathErr.get();
|
||||
}
|
||||
|
||||
if (Error Err = runNVLink(NvlinkPath, ArgvSubst))
|
||||
reportError(std::move(Err));
|
||||
if (Error Err = cleanupTmpFiles(TmpFiles))
|
||||
reportError(std::move(Err));
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
set(LLVM_LINK_COMPONENTS BitWriter Core Object Support TransformUtils)
|
||||
|
||||
add_clang_tool(clang-offload-wrapper
|
||||
ClangOffloadWrapper.cpp
|
||||
|
||||
DEPENDS
|
||||
intrinsics_gen
|
||||
)
|
||||
|
||||
set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS
|
||||
clangBasic
|
||||
)
|
||||
|
||||
add_dependencies(clang clang-offload-wrapper)
|
||||
|
||||
clang_target_link_libraries(clang-offload-wrapper
|
||||
PRIVATE
|
||||
${CLANG_OFFLOAD_WRAPPER_LIB_DEPS}
|
||||
)
|
|
@ -1,666 +0,0 @@
|
|||
//===-- clang-offload-wrapper/ClangOffloadWrapper.cpp -----------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// Implementation of the offload wrapper tool. It takes offload target binaries
|
||||
/// as input and creates wrapper bitcode file containing target binaries
|
||||
/// packaged as data. Wrapper bitcode also includes initialization code which
|
||||
/// registers target binaries in offloading runtime at program startup.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clang/Basic/Version.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
#include "llvm/Bitcode/BitcodeWriter.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Object/ELFObjectFile.h"
|
||||
#include "llvm/Object/ObjectFile.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/EndianStream.h"
|
||||
#include "llvm/Support/Errc.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/ErrorOr.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include "llvm/Support/Program.h"
|
||||
#include "llvm/Support/Signals.h"
|
||||
#include "llvm/Support/ToolOutputFile.h"
|
||||
#include "llvm/Support/VCSRevision.h"
|
||||
#include "llvm/Support/WithColor.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
#define OPENMP_OFFLOAD_IMAGE_VERSION "1.0"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::object;
|
||||
|
||||
static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
|
||||
|
||||
// Mark all our options with this category, everything else (except for -version
|
||||
// and -help) will be hidden.
|
||||
static cl::OptionCategory
|
||||
ClangOffloadWrapperCategory("clang-offload-wrapper options");
|
||||
|
||||
static cl::opt<std::string> Output("o", cl::Required,
|
||||
cl::desc("Output filename"),
|
||||
cl::value_desc("filename"),
|
||||
cl::cat(ClangOffloadWrapperCategory));
|
||||
|
||||
static cl::list<std::string> Inputs(cl::Positional, cl::OneOrMore,
|
||||
cl::desc("<input files>"),
|
||||
cl::cat(ClangOffloadWrapperCategory));
|
||||
|
||||
static cl::opt<std::string>
|
||||
Target("target", cl::Required,
|
||||
cl::desc("Target triple for the output module"),
|
||||
cl::value_desc("triple"), cl::cat(ClangOffloadWrapperCategory));
|
||||
|
||||
static cl::opt<bool> SaveTemps(
|
||||
"save-temps",
|
||||
cl::desc("Save temporary files that may be produced by the tool. "
|
||||
"This option forces print-out of the temporary files' names."),
|
||||
cl::Hidden);
|
||||
|
||||
static cl::opt<bool> AddOpenMPOffloadNotes(
|
||||
"add-omp-offload-notes",
|
||||
cl::desc("Add LLVMOMPOFFLOAD ELF notes to ELF device images."), cl::Hidden);
|
||||
|
||||
namespace {
|
||||
|
||||
class BinaryWrapper {
|
||||
LLVMContext C;
|
||||
Module M;
|
||||
|
||||
StructType *EntryTy = nullptr;
|
||||
StructType *ImageTy = nullptr;
|
||||
StructType *DescTy = nullptr;
|
||||
|
||||
std::string ToolName;
|
||||
std::string ObjcopyPath;
|
||||
// Temporary file names that may be created during adding notes
|
||||
// to ELF offload images. Use -save-temps to keep them and also
|
||||
// see their names. A temporary file's name includes the name
|
||||
// of the original input ELF image, so you can easily match
|
||||
// them, if you have multiple inputs.
|
||||
std::vector<std::string> TempFiles;
|
||||
|
||||
private:
|
||||
IntegerType *getSizeTTy() {
|
||||
switch (M.getDataLayout().getPointerTypeSize(Type::getInt8PtrTy(C))) {
|
||||
case 4u:
|
||||
return Type::getInt32Ty(C);
|
||||
case 8u:
|
||||
return Type::getInt64Ty(C);
|
||||
}
|
||||
llvm_unreachable("unsupported pointer type size");
|
||||
}
|
||||
|
||||
// struct __tgt_offload_entry {
|
||||
// void *addr;
|
||||
// char *name;
|
||||
// size_t size;
|
||||
// int32_t flags;
|
||||
// int32_t reserved;
|
||||
// };
|
||||
StructType *getEntryTy() {
|
||||
if (!EntryTy)
|
||||
EntryTy = StructType::create("__tgt_offload_entry", Type::getInt8PtrTy(C),
|
||||
Type::getInt8PtrTy(C), getSizeTTy(),
|
||||
Type::getInt32Ty(C), Type::getInt32Ty(C));
|
||||
return EntryTy;
|
||||
}
|
||||
|
||||
PointerType *getEntryPtrTy() { return PointerType::getUnqual(getEntryTy()); }
|
||||
|
||||
// struct __tgt_device_image {
|
||||
// void *ImageStart;
|
||||
// void *ImageEnd;
|
||||
// __tgt_offload_entry *EntriesBegin;
|
||||
// __tgt_offload_entry *EntriesEnd;
|
||||
// };
|
||||
StructType *getDeviceImageTy() {
|
||||
if (!ImageTy)
|
||||
ImageTy = StructType::create("__tgt_device_image", Type::getInt8PtrTy(C),
|
||||
Type::getInt8PtrTy(C), getEntryPtrTy(),
|
||||
getEntryPtrTy());
|
||||
return ImageTy;
|
||||
}
|
||||
|
||||
PointerType *getDeviceImagePtrTy() {
|
||||
return PointerType::getUnqual(getDeviceImageTy());
|
||||
}
|
||||
|
||||
// struct __tgt_bin_desc {
|
||||
// int32_t NumDeviceImages;
|
||||
// __tgt_device_image *DeviceImages;
|
||||
// __tgt_offload_entry *HostEntriesBegin;
|
||||
// __tgt_offload_entry *HostEntriesEnd;
|
||||
// };
|
||||
StructType *getBinDescTy() {
|
||||
if (!DescTy)
|
||||
DescTy = StructType::create("__tgt_bin_desc", Type::getInt32Ty(C),
|
||||
getDeviceImagePtrTy(), getEntryPtrTy(),
|
||||
getEntryPtrTy());
|
||||
return DescTy;
|
||||
}
|
||||
|
||||
PointerType *getBinDescPtrTy() {
|
||||
return PointerType::getUnqual(getBinDescTy());
|
||||
}
|
||||
|
||||
/// Creates binary descriptor for the given device images. Binary descriptor
|
||||
/// is an object that is passed to the offloading runtime at program startup
|
||||
/// and it describes all device images available in the executable or shared
|
||||
/// library. It is defined as follows
|
||||
///
|
||||
/// __attribute__((visibility("hidden")))
|
||||
/// extern __tgt_offload_entry *__start_omp_offloading_entries;
|
||||
/// __attribute__((visibility("hidden")))
|
||||
/// extern __tgt_offload_entry *__stop_omp_offloading_entries;
|
||||
///
|
||||
/// static const char Image0[] = { <Bufs.front() contents> };
|
||||
/// ...
|
||||
/// static const char ImageN[] = { <Bufs.back() contents> };
|
||||
///
|
||||
/// static const __tgt_device_image Images[] = {
|
||||
/// {
|
||||
/// Image0, /*ImageStart*/
|
||||
/// Image0 + sizeof(Image0), /*ImageEnd*/
|
||||
/// __start_omp_offloading_entries, /*EntriesBegin*/
|
||||
/// __stop_omp_offloading_entries /*EntriesEnd*/
|
||||
/// },
|
||||
/// ...
|
||||
/// {
|
||||
/// ImageN, /*ImageStart*/
|
||||
/// ImageN + sizeof(ImageN), /*ImageEnd*/
|
||||
/// __start_omp_offloading_entries, /*EntriesBegin*/
|
||||
/// __stop_omp_offloading_entries /*EntriesEnd*/
|
||||
/// }
|
||||
/// };
|
||||
///
|
||||
/// static const __tgt_bin_desc BinDesc = {
|
||||
/// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
|
||||
/// Images, /*DeviceImages*/
|
||||
/// __start_omp_offloading_entries, /*HostEntriesBegin*/
|
||||
/// __stop_omp_offloading_entries /*HostEntriesEnd*/
|
||||
/// };
|
||||
///
|
||||
/// Global variable that represents BinDesc is returned.
|
||||
GlobalVariable *createBinDesc(ArrayRef<ArrayRef<char>> Bufs) {
|
||||
// Create external begin/end symbols for the offload entries table.
|
||||
auto *EntriesB = new GlobalVariable(
|
||||
M, getEntryTy(), /*isConstant*/ true, GlobalValue::ExternalLinkage,
|
||||
/*Initializer*/ nullptr, "__start_omp_offloading_entries");
|
||||
EntriesB->setVisibility(GlobalValue::HiddenVisibility);
|
||||
auto *EntriesE = new GlobalVariable(
|
||||
M, getEntryTy(), /*isConstant*/ true, GlobalValue::ExternalLinkage,
|
||||
/*Initializer*/ nullptr, "__stop_omp_offloading_entries");
|
||||
EntriesE->setVisibility(GlobalValue::HiddenVisibility);
|
||||
|
||||
// We assume that external begin/end symbols that we have created above will
|
||||
// be defined by the linker. But linker will do that only if linker inputs
|
||||
// have section with "omp_offloading_entries" name which is not guaranteed.
|
||||
// So, we just create dummy zero sized object in the offload entries section
|
||||
// to force linker to define those symbols.
|
||||
auto *DummyInit =
|
||||
ConstantAggregateZero::get(ArrayType::get(getEntryTy(), 0u));
|
||||
auto *DummyEntry = new GlobalVariable(
|
||||
M, DummyInit->getType(), true, GlobalVariable::ExternalLinkage,
|
||||
DummyInit, "__dummy.omp_offloading.entry");
|
||||
DummyEntry->setSection("omp_offloading_entries");
|
||||
DummyEntry->setVisibility(GlobalValue::HiddenVisibility);
|
||||
|
||||
auto *Zero = ConstantInt::get(getSizeTTy(), 0u);
|
||||
Constant *ZeroZero[] = {Zero, Zero};
|
||||
|
||||
// Create initializer for the images array.
|
||||
SmallVector<Constant *, 4u> ImagesInits;
|
||||
ImagesInits.reserve(Bufs.size());
|
||||
for (ArrayRef<char> Buf : Bufs) {
|
||||
auto *Data = ConstantDataArray::get(C, Buf);
|
||||
auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
|
||||
GlobalVariable::InternalLinkage, Data,
|
||||
".omp_offloading.device_image");
|
||||
Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
||||
|
||||
auto *Size = ConstantInt::get(getSizeTTy(), Buf.size());
|
||||
Constant *ZeroSize[] = {Zero, Size};
|
||||
|
||||
auto *ImageB = ConstantExpr::getGetElementPtr(Image->getValueType(),
|
||||
Image, ZeroZero);
|
||||
auto *ImageE = ConstantExpr::getGetElementPtr(Image->getValueType(),
|
||||
Image, ZeroSize);
|
||||
|
||||
ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(), ImageB,
|
||||
ImageE, EntriesB, EntriesE));
|
||||
}
|
||||
|
||||
// Then create images array.
|
||||
auto *ImagesData = ConstantArray::get(
|
||||
ArrayType::get(getDeviceImageTy(), ImagesInits.size()), ImagesInits);
|
||||
|
||||
auto *Images =
|
||||
new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
|
||||
GlobalValue::InternalLinkage, ImagesData,
|
||||
".omp_offloading.device_images");
|
||||
Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
|
||||
|
||||
auto *ImagesB = ConstantExpr::getGetElementPtr(Images->getValueType(),
|
||||
Images, ZeroZero);
|
||||
|
||||
// And finally create the binary descriptor object.
|
||||
auto *DescInit = ConstantStruct::get(
|
||||
getBinDescTy(),
|
||||
ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB,
|
||||
EntriesB, EntriesE);
|
||||
|
||||
return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
|
||||
GlobalValue::InternalLinkage, DescInit,
|
||||
".omp_offloading.descriptor");
|
||||
}
|
||||
|
||||
void createRegisterFunction(GlobalVariable *BinDesc) {
|
||||
auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
|
||||
auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
|
||||
".omp_offloading.descriptor_reg", &M);
|
||||
Func->setSection(".text.startup");
|
||||
|
||||
// Get __tgt_register_lib function declaration.
|
||||
auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(),
|
||||
/*isVarArg*/ false);
|
||||
FunctionCallee RegFuncC =
|
||||
M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
|
||||
|
||||
// Construct function body
|
||||
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
|
||||
Builder.CreateCall(RegFuncC, BinDesc);
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
// Add this function to constructors.
|
||||
// Set priority to 1 so that __tgt_register_lib is executed AFTER
|
||||
// __tgt_register_requires (we want to know what requirements have been
|
||||
// asked for before we load a libomptarget plugin so that by the time the
|
||||
// plugin is loaded it can report how many devices there are which can
|
||||
// satisfy these requirements).
|
||||
appendToGlobalCtors(M, Func, /*Priority*/ 1);
|
||||
}
|
||||
|
||||
void createUnregisterFunction(GlobalVariable *BinDesc) {
|
||||
auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
|
||||
auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
|
||||
".omp_offloading.descriptor_unreg", &M);
|
||||
Func->setSection(".text.startup");
|
||||
|
||||
// Get __tgt_unregister_lib function declaration.
|
||||
auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(),
|
||||
/*isVarArg*/ false);
|
||||
FunctionCallee UnRegFuncC =
|
||||
M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
|
||||
|
||||
// Construct function body
|
||||
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
|
||||
Builder.CreateCall(UnRegFuncC, BinDesc);
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
// Add this function to global destructors.
|
||||
// Match priority of __tgt_register_lib
|
||||
appendToGlobalDtors(M, Func, /*Priority*/ 1);
|
||||
}
|
||||
|
||||
public:
|
||||
BinaryWrapper(StringRef Target, StringRef ToolName)
|
||||
: M("offload.wrapper.object", C), ToolName(ToolName) {
|
||||
M.setTargetTriple(Target);
|
||||
// Look for llvm-objcopy in the same directory, from which
|
||||
// clang-offload-wrapper is invoked. This helps OpenMP offload
|
||||
// LIT tests.
|
||||
|
||||
// This just needs to be some symbol in the binary; C++ doesn't
|
||||
// allow taking the address of ::main however.
|
||||
void *P = (void *)(intptr_t)&Help;
|
||||
std::string COWPath = sys::fs::getMainExecutable(ToolName.str().c_str(), P);
|
||||
if (!COWPath.empty()) {
|
||||
auto COWDir = sys::path::parent_path(COWPath);
|
||||
ErrorOr<std::string> ObjcopyPathOrErr =
|
||||
sys::findProgramByName("llvm-objcopy", {COWDir});
|
||||
if (ObjcopyPathOrErr) {
|
||||
ObjcopyPath = *ObjcopyPathOrErr;
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise, look through PATH environment.
|
||||
}
|
||||
|
||||
ErrorOr<std::string> ObjcopyPathOrErr =
|
||||
sys::findProgramByName("llvm-objcopy");
|
||||
if (!ObjcopyPathOrErr) {
|
||||
WithColor::warning(errs(), ToolName)
|
||||
<< "cannot find llvm-objcopy[.exe] in PATH; ELF notes cannot be "
|
||||
"added.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
ObjcopyPath = *ObjcopyPathOrErr;
|
||||
}
|
||||
|
||||
~BinaryWrapper() {
|
||||
if (TempFiles.empty())
|
||||
return;
|
||||
|
||||
StringRef ToolNameRef(ToolName);
|
||||
auto warningOS = [ToolNameRef]() -> raw_ostream & {
|
||||
return WithColor::warning(errs(), ToolNameRef);
|
||||
};
|
||||
|
||||
for (auto &F : TempFiles) {
|
||||
if (SaveTemps) {
|
||||
warningOS() << "keeping temporary file " << F << "\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
auto EC = sys::fs::remove(F, false);
|
||||
if (EC)
|
||||
warningOS() << "cannot remove temporary file " << F << ": "
|
||||
<< EC.message().c_str() << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
const Module &wrapBinaries(ArrayRef<ArrayRef<char>> Binaries) {
|
||||
GlobalVariable *Desc = createBinDesc(Binaries);
|
||||
assert(Desc && "no binary descriptor");
|
||||
createRegisterFunction(Desc);
|
||||
createUnregisterFunction(Desc);
|
||||
return M;
|
||||
}
|
||||
|
||||
std::unique_ptr<MemoryBuffer> addELFNotes(std::unique_ptr<MemoryBuffer> Buf,
|
||||
StringRef OriginalFileName) {
|
||||
// Cannot add notes, if llvm-objcopy is not available.
|
||||
//
|
||||
// I did not find a clean way to add a new notes section into an existing
|
||||
// ELF file. llvm-objcopy seems to recreate a new ELF from scratch,
|
||||
// and we just try to use llvm-objcopy here.
|
||||
if (ObjcopyPath.empty())
|
||||
return Buf;
|
||||
|
||||
StringRef ToolNameRef(ToolName);
|
||||
|
||||
// Helpers to emit warnings.
|
||||
auto warningOS = [ToolNameRef]() -> raw_ostream & {
|
||||
return WithColor::warning(errs(), ToolNameRef);
|
||||
};
|
||||
auto handleErrorAsWarning = [&warningOS](Error E) {
|
||||
logAllUnhandledErrors(std::move(E), warningOS());
|
||||
};
|
||||
|
||||
Expected<std::unique_ptr<ObjectFile>> BinOrErr =
|
||||
ObjectFile::createELFObjectFile(Buf->getMemBufferRef(),
|
||||
/*InitContent=*/false);
|
||||
if (Error E = BinOrErr.takeError()) {
|
||||
consumeError(std::move(E));
|
||||
// This warning is questionable, but let it be here,
|
||||
// assuming that most OpenMP offload models use ELF offload images.
|
||||
warningOS() << OriginalFileName
|
||||
<< " is not an ELF image, so notes cannot be added to it.\n";
|
||||
return Buf;
|
||||
}
|
||||
|
||||
// If we fail to add the note section, we just pass through the original
|
||||
// ELF image for wrapping. At some point we should enforce the note section
|
||||
// and start emitting errors vs warnings.
|
||||
support::endianness Endianness;
|
||||
if (isa<ELF64LEObjectFile>(BinOrErr->get()) ||
|
||||
isa<ELF32LEObjectFile>(BinOrErr->get())) {
|
||||
Endianness = support::little;
|
||||
} else if (isa<ELF64BEObjectFile>(BinOrErr->get()) ||
|
||||
isa<ELF32BEObjectFile>(BinOrErr->get())) {
|
||||
Endianness = support::big;
|
||||
} else {
|
||||
warningOS() << OriginalFileName
|
||||
<< " is an ELF image of unrecognized format.\n";
|
||||
return Buf;
|
||||
}
|
||||
|
||||
// Create temporary file for the data of a new SHT_NOTE section.
|
||||
// We fill it in with data and then pass to llvm-objcopy invocation
|
||||
// for reading.
|
||||
Twine NotesFileModel = OriginalFileName + Twine(".elfnotes.%%%%%%%.tmp");
|
||||
Expected<sys::fs::TempFile> NotesTemp =
|
||||
sys::fs::TempFile::create(NotesFileModel);
|
||||
if (Error E = NotesTemp.takeError()) {
|
||||
handleErrorAsWarning(createFileError(NotesFileModel, std::move(E)));
|
||||
return Buf;
|
||||
}
|
||||
TempFiles.push_back(NotesTemp->TmpName);
|
||||
|
||||
// Create temporary file for the updated ELF image.
|
||||
// This is an empty file that we pass to llvm-objcopy invocation
|
||||
// for writing.
|
||||
Twine ELFFileModel = OriginalFileName + Twine(".elfwithnotes.%%%%%%%.tmp");
|
||||
Expected<sys::fs::TempFile> ELFTemp =
|
||||
sys::fs::TempFile::create(ELFFileModel);
|
||||
if (Error E = ELFTemp.takeError()) {
|
||||
handleErrorAsWarning(createFileError(ELFFileModel, std::move(E)));
|
||||
return Buf;
|
||||
}
|
||||
TempFiles.push_back(ELFTemp->TmpName);
|
||||
|
||||
// Keep the new ELF image file to reserve the name for the future
|
||||
// llvm-objcopy invocation.
|
||||
std::string ELFTmpFileName = ELFTemp->TmpName;
|
||||
if (Error E = ELFTemp->keep(ELFTmpFileName)) {
|
||||
handleErrorAsWarning(createFileError(ELFTmpFileName, std::move(E)));
|
||||
return Buf;
|
||||
}
|
||||
|
||||
// Write notes to the *elfnotes*.tmp file.
|
||||
raw_fd_ostream NotesOS(NotesTemp->FD, false);
|
||||
|
||||
struct NoteTy {
|
||||
// Note name is a null-terminated "LLVMOMPOFFLOAD".
|
||||
std::string Name;
|
||||
// Note type defined in llvm/include/llvm/BinaryFormat/ELF.h.
|
||||
uint32_t Type = 0;
|
||||
// Each note has type-specific associated data.
|
||||
std::string Desc;
|
||||
|
||||
NoteTy(std::string &&Name, uint32_t Type, std::string &&Desc)
|
||||
: Name(std::move(Name)), Type(Type), Desc(std::move(Desc)) {}
|
||||
};
|
||||
|
||||
// So far we emit just three notes.
|
||||
SmallVector<NoteTy, 3> Notes;
|
||||
// Version of the offload image identifying the structure of the ELF image.
|
||||
// Version 1.0 does not have any specific requirements.
|
||||
// We may come up with some structure that has to be honored by all
|
||||
// offload implementations in future (e.g. to let libomptarget
|
||||
// get some information from the offload image).
|
||||
Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION,
|
||||
OPENMP_OFFLOAD_IMAGE_VERSION);
|
||||
// This is a producer identification string. We are LLVM!
|
||||
Notes.emplace_back("LLVMOMPOFFLOAD", ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER,
|
||||
"LLVM");
|
||||
// This is a producer version. Use the same format that is used
|
||||
// by clang to report the LLVM version.
|
||||
Notes.emplace_back("LLVMOMPOFFLOAD",
|
||||
ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION,
|
||||
LLVM_VERSION_STRING
|
||||
#ifdef LLVM_REVISION
|
||||
" " LLVM_REVISION
|
||||
#endif
|
||||
);
|
||||
|
||||
// Return the amount of padding required for a blob of N bytes
|
||||
// to be aligned to Alignment bytes.
|
||||
auto getPadAmount = [](uint32_t N, uint32_t Alignment) -> uint32_t {
|
||||
uint32_t Mod = (N % Alignment);
|
||||
if (Mod == 0)
|
||||
return 0;
|
||||
return Alignment - Mod;
|
||||
};
|
||||
auto emitPadding = [&getPadAmount](raw_ostream &OS, uint32_t Size) {
|
||||
for (uint32_t I = 0; I < getPadAmount(Size, 4); ++I)
|
||||
OS << '\0';
|
||||
};
|
||||
|
||||
// Put notes into the file.
|
||||
for (auto &N : Notes) {
|
||||
assert(!N.Name.empty() && "We should not create notes with empty names.");
|
||||
// Name must be null-terminated.
|
||||
if (N.Name.back() != '\0')
|
||||
N.Name += '\0';
|
||||
uint32_t NameSz = N.Name.size();
|
||||
uint32_t DescSz = N.Desc.size();
|
||||
// A note starts with three 4-byte values:
|
||||
// NameSz
|
||||
// DescSz
|
||||
// Type
|
||||
// These three fields are endian-sensitive.
|
||||
support::endian::write<uint32_t>(NotesOS, NameSz, Endianness);
|
||||
support::endian::write<uint32_t>(NotesOS, DescSz, Endianness);
|
||||
support::endian::write<uint32_t>(NotesOS, N.Type, Endianness);
|
||||
// Next, we have a null-terminated Name padded to a 4-byte boundary.
|
||||
NotesOS << N.Name;
|
||||
emitPadding(NotesOS, NameSz);
|
||||
if (DescSz == 0)
|
||||
continue;
|
||||
// Finally, we have a descriptor, which is an arbitrary flow of bytes.
|
||||
NotesOS << N.Desc;
|
||||
emitPadding(NotesOS, DescSz);
|
||||
}
|
||||
NotesOS.flush();
|
||||
|
||||
// Keep the notes file.
|
||||
std::string NotesTmpFileName = NotesTemp->TmpName;
|
||||
if (Error E = NotesTemp->keep(NotesTmpFileName)) {
|
||||
handleErrorAsWarning(createFileError(NotesTmpFileName, std::move(E)));
|
||||
return Buf;
|
||||
}
|
||||
|
||||
// Run llvm-objcopy like this:
|
||||
// llvm-objcopy --add-section=.note.openmp=<notes-tmp-file-name> \
|
||||
// <orig-file-name> <elf-tmp-file-name>
|
||||
//
|
||||
// This will add a SHT_NOTE section on top of the original ELF.
|
||||
std::vector<StringRef> Args;
|
||||
Args.push_back(ObjcopyPath);
|
||||
std::string Option("--add-section=.note.openmp=" + NotesTmpFileName);
|
||||
Args.push_back(Option);
|
||||
Args.push_back(OriginalFileName);
|
||||
Args.push_back(ELFTmpFileName);
|
||||
bool ExecutionFailed = false;
|
||||
std::string ErrMsg;
|
||||
(void)sys::ExecuteAndWait(ObjcopyPath, Args,
|
||||
/*Env=*/llvm::None, /*Redirects=*/{},
|
||||
/*SecondsToWait=*/0,
|
||||
/*MemoryLimit=*/0, &ErrMsg, &ExecutionFailed);
|
||||
|
||||
if (ExecutionFailed) {
|
||||
warningOS() << ErrMsg << "\n";
|
||||
return Buf;
|
||||
}
|
||||
|
||||
// Substitute the original ELF with new one.
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
|
||||
MemoryBuffer::getFile(ELFTmpFileName);
|
||||
if (!BufOrErr) {
|
||||
handleErrorAsWarning(
|
||||
createFileError(ELFTmpFileName, BufOrErr.getError()));
|
||||
return Buf;
|
||||
}
|
||||
|
||||
return std::move(*BufOrErr);
|
||||
}
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
sys::PrintStackTraceOnErrorSignal(argv[0]);
|
||||
|
||||
cl::HideUnrelatedOptions(ClangOffloadWrapperCategory);
|
||||
cl::SetVersionPrinter([](raw_ostream &OS) {
|
||||
OS << clang::getClangToolFullVersion("clang-offload-wrapper") << '\n';
|
||||
});
|
||||
cl::ParseCommandLineOptions(
|
||||
argc, argv,
|
||||
"A tool to create a wrapper bitcode for offload target binaries. Takes "
|
||||
"offload\ntarget binaries as input and produces bitcode file containing "
|
||||
"target binaries packaged\nas data and initialization code which "
|
||||
"registers target binaries in offload runtime.\n");
|
||||
|
||||
if (Help) {
|
||||
cl::PrintHelpMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto reportError = [argv](Error E) {
|
||||
logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
|
||||
};
|
||||
|
||||
if (Triple(Target).getArch() == Triple::UnknownArch) {
|
||||
reportError(createStringError(
|
||||
errc::invalid_argument, "'" + Target + "': unsupported target triple"));
|
||||
return 1;
|
||||
}
|
||||
|
||||
BinaryWrapper Wrapper(Target, argv[0]);
|
||||
|
||||
// Read device binaries.
|
||||
SmallVector<std::unique_ptr<MemoryBuffer>, 4u> Buffers;
|
||||
SmallVector<ArrayRef<char>, 4u> Images;
|
||||
Buffers.reserve(Inputs.size());
|
||||
Images.reserve(Inputs.size());
|
||||
for (const std::string &File : Inputs) {
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
|
||||
MemoryBuffer::getFileOrSTDIN(File);
|
||||
if (!BufOrErr) {
|
||||
reportError(createFileError(File, BufOrErr.getError()));
|
||||
return 1;
|
||||
}
|
||||
std::unique_ptr<MemoryBuffer> Buffer(std::move(*BufOrErr));
|
||||
if (File != "-" && AddOpenMPOffloadNotes) {
|
||||
// Adding ELF notes for STDIN is not supported yet.
|
||||
Buffer = Wrapper.addELFNotes(std::move(Buffer), File);
|
||||
}
|
||||
const std::unique_ptr<MemoryBuffer> &Buf =
|
||||
Buffers.emplace_back(std::move(Buffer));
|
||||
Images.emplace_back(Buf->getBufferStart(), Buf->getBufferSize());
|
||||
}
|
||||
|
||||
// Create the output file to write the resulting bitcode to.
|
||||
std::error_code EC;
|
||||
ToolOutputFile Out(Output, EC, sys::fs::OF_None);
|
||||
if (EC) {
|
||||
reportError(createFileError(Output, EC));
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create a wrapper for device binaries and write its bitcode to the file.
|
||||
WriteBitcodeToFile(
|
||||
Wrapper.wrapBinaries(makeArrayRef(Images.data(), Images.size())),
|
||||
Out.os());
|
||||
if (Out.os().has_error()) {
|
||||
reportError(createFileError(Output, Out.os().error()));
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Success.
|
||||
Out.keep();
|
||||
return 0;
|
||||
}
|
|
@ -39,22 +39,16 @@ include_directories(${LIBOMPTARGET_LLVM_INCLUDE_DIRS})
|
|||
|
||||
# This is a list of all the targets that are supported/tested right now.
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} aarch64-unknown-linux-gnu")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} aarch64-unknown-linux-gnu-oldDriver")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} aarch64-unknown-linux-gnu-LTO")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} amdgcn-amd-amdhsa")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} amdgcn-amd-amdhsa-oldDriver")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} amdgcn-amd-amdhsa-LTO")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64le-ibm-linux-gnu")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64le-ibm-linux-gnu-oldDriver")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64le-ibm-linux-gnu-LTO")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64-ibm-linux-gnu")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64-ibm-linux-gnu-oldDriver")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64-ibm-linux-gnu-LTO")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu-oldDriver")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu-LTO")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-oldDriver")
|
||||
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-LTO")
|
||||
|
||||
# Once the plugins for the different targets are validated, they will be added to
|
||||
|
|
|
@ -105,14 +105,10 @@ else: # Unices
|
|||
config.test_flags += " --libomptarget-amdgcn-bc-path=" + config.library_dir
|
||||
if config.libomptarget_current_target.startswith('nvptx'):
|
||||
config.test_flags += " --libomptarget-nvptx-bc-path=" + config.library_dir
|
||||
if config.libomptarget_current_target.endswith('-oldDriver'):
|
||||
config.test_flags += " -fno-openmp-new-driver"
|
||||
if config.libomptarget_current_target.endswith('-LTO'):
|
||||
config.test_flags += " -foffload-lto"
|
||||
|
||||
def remove_suffix_if_present(name):
|
||||
if name.endswith('-oldDriver'):
|
||||
return name[:-10]
|
||||
if name.endswith('-LTO'):
|
||||
return name[:-4]
|
||||
else:
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// Wrong results on amdgpu
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-oldDriver
|
||||
// XFAIL: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// Wrong results on amdgpu
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-oldDriver
|
||||
// XFAIL: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <cstdio>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// Wrong results on amdgpu
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-oldDriver
|
||||
// XFAIL: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <cstdio>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// Wrong results on amdgpu
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-oldDriver
|
||||
// XFAIL: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// Error on the gpu that crashes the host
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <iostream>
|
||||
|
|
|
@ -3,22 +3,16 @@
|
|||
// Taken from https://github.com/llvm/llvm-project/issues/54216
|
||||
|
||||
// UNSUPPORTED: aarch64-unknown-linux-gnu
|
||||
// UNSUPPORTED: aarch64-unknown-linux-gnu-oldDriver
|
||||
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-LTO
|
||||
// UNSUPPORTED: powerpc64le-ibm-linux-gnu
|
||||
// UNSUPPORTED: powerpc64le-ibm-linux-gnu-oldDriver
|
||||
// UNSUPPORTED: powerpc64le-ibm-linux-gnu-LTO
|
||||
// UNSUPPORTED: powerpc64-ibm-linux-gnu
|
||||
// UNSUPPORTED: powerpc64-ibm-linux-gnu-oldDriver
|
||||
// UNSUPPORTED: powerpc64-ibm-linux-gnu-LTO
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
|
||||
// UNSUPPORTED: nvptx64-nvidia-cuda
|
||||
// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver
|
||||
// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
|
||||
|
||||
#include <algorithm>
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
// Wrong results on amdgpu
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-oldDriver
|
||||
// XFAIL: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <omp.h>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// Hangs
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <iostream>
|
||||
|
|
|
@ -2,10 +2,8 @@
|
|||
|
||||
// Currently hangs on amdgpu
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-LTO
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
|
||||
|
||||
#include <cassert>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// RUN: env LIBOMPTARGET_STACK_SIZE=2048 %libomptarget-run-generic
|
||||
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <cassert>
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
|
||||
// Hangs
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#if ADD_REDUCTION
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
|
||||
// amdgpu does not have a working printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-oldDriver
|
||||
// XFAIL: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
// RUN: %libomptarget-compilexx-run-and-check-generic
|
||||
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
|
||||
|
||||
#include <omp.h>
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
// RUN: %libomptarget-compilexx-run-and-check-generic
|
||||
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
|
||||
|
||||
#include <cassert>
|
||||
|
|
|
@ -2,9 +2,6 @@
|
|||
// RUN: ar rcs %t.a %t.o
|
||||
// RUN: %libomptarget-compile-generic %t.a && %libomptarget-run-generic 2>&1 | %fcheck-generic
|
||||
|
||||
// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver
|
||||
|
||||
#ifdef LIBRARY
|
||||
int x = 42;
|
||||
#pragma omp declare target(x)
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
// RUN: %libomptarget-compilexx-and-run-generic
|
||||
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
|
||||
|
||||
#include <cmath>
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
// RUN: %libomptarget-compile-run-and-check-generic
|
||||
// XFAIL: nvptx64-nvidia-cuda
|
||||
// XFAIL: nvptx64-nvidia-cuda-oldDriver
|
||||
// XFAIL: nvptx64-nvidia-cuda-LTO
|
||||
|
||||
// Fails on amdgpu with error: GPU Memory Error
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-oldDriver
|
||||
// XFAIL: amdgcn-amd-amdhsa-LTO
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
Loading…
Reference in New Issue