forked from OSchip/llvm-project
[CUDA][OpenMP] Create generic offload toolchains
Summary: This patch introduces the concept of offloading tool chain and offloading kind. Each tool chain may have associated an offloading kind that marks it as used in a given programming model that requires offloading. It also adds the logic to iterate on the tool chains based on the kind. Currently, only CUDA is supported, but in general a programming model (an offloading kind) may have associated multiple tool chains that require supporting offloading. This patch does not add tests - its goal is to keep the existing functionality. This patch is the first of a series of three that attempts to make the current support of CUDA more generic and easier to extend to other programming models, namely OpenMP. It tries to capture the suggestions/improvements/concerns on the initial proposal in http://lists.llvm.org/pipermail/cfe-dev/2016-February/047547.html. It only tackles the more consensual part of the proposal, i.e.does not address the problem of intermediate files bundling yet. Reviewers: ABataev, jlebar, echristo, hfinkel, tra Subscribers: guansong, Hahnfeld, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: http://reviews.llvm.org/D18170 llvm-svn: 272571
This commit is contained in:
parent
8cb45c838f
commit
c1ffba5062
|
@ -68,6 +68,21 @@ public:
|
|||
JobClassLast=VerifyPCHJobClass
|
||||
};
|
||||
|
||||
// The offloading kind determines if this action is binded to a particular
|
||||
// programming model. Each entry reserves one bit. We also have a special kind
|
||||
// to designate the host offloading tool chain.
|
||||
//
|
||||
// FIXME: This is currently used to indicate that tool chains are used in a
|
||||
// given programming, but will be used here as well once a generic offloading
|
||||
// action is implemented.
|
||||
enum OffloadKind {
|
||||
OFK_None = 0x00,
|
||||
// The host offloading tool chain.
|
||||
OFK_Host = 0x01,
|
||||
// The device offloading tool chains - one bit for each programming model.
|
||||
OFK_Cuda = 0x02,
|
||||
};
|
||||
|
||||
static const char *getClassName(ActionClass AC);
|
||||
|
||||
private:
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "clang/Driver/Util.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include <map>
|
||||
|
||||
namespace llvm {
|
||||
namespace opt {
|
||||
|
@ -38,8 +39,16 @@ class Compilation {
|
|||
/// The default tool chain.
|
||||
const ToolChain &DefaultToolChain;
|
||||
|
||||
const ToolChain *CudaHostToolChain;
|
||||
const ToolChain *CudaDeviceToolChain;
|
||||
/// A mask of all the programming models the host has to support in the
|
||||
/// current compilation.
|
||||
unsigned ActiveOffloadMask;
|
||||
|
||||
/// Array with the toolchains of offloading host and devices in the order they
|
||||
/// were requested by the user. We are preserving that order in case the code
|
||||
/// generation needs to derive a programming-model-specific semantic out of
|
||||
/// it.
|
||||
std::multimap<Action::OffloadKind, const ToolChain *>
|
||||
OrderedOffloadingToolchains;
|
||||
|
||||
/// The original (untranslated) input argument list.
|
||||
llvm::opt::InputArgList *Args;
|
||||
|
@ -89,16 +98,51 @@ public:
|
|||
const Driver &getDriver() const { return TheDriver; }
|
||||
|
||||
const ToolChain &getDefaultToolChain() const { return DefaultToolChain; }
|
||||
const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; }
|
||||
const ToolChain *getCudaDeviceToolChain() const {
|
||||
return CudaDeviceToolChain;
|
||||
const ToolChain *getOffloadingHostToolChain() const {
|
||||
auto It = OrderedOffloadingToolchains.find(Action::OFK_Host);
|
||||
if (It != OrderedOffloadingToolchains.end())
|
||||
return It->second;
|
||||
return nullptr;
|
||||
}
|
||||
unsigned isOffloadingHostKind(Action::OffloadKind Kind) const {
|
||||
return ActiveOffloadMask & Kind;
|
||||
}
|
||||
|
||||
void setCudaHostToolChain(const ToolChain *HostToolChain) {
|
||||
CudaHostToolChain = HostToolChain;
|
||||
/// Iterator that visits device toolchains of a given kind.
|
||||
typedef const std::multimap<Action::OffloadKind,
|
||||
const ToolChain *>::const_iterator
|
||||
const_offload_toolchains_iterator;
|
||||
typedef std::pair<const_offload_toolchains_iterator,
|
||||
const_offload_toolchains_iterator>
|
||||
const_offload_toolchains_range;
|
||||
|
||||
template <Action::OffloadKind Kind>
|
||||
const_offload_toolchains_range getOffloadToolChains() const {
|
||||
return OrderedOffloadingToolchains.equal_range(Kind);
|
||||
}
|
||||
void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) {
|
||||
CudaDeviceToolChain = DeviceToolChain;
|
||||
|
||||
// Return an offload toolchain of the provided kind. Only one is expected to
|
||||
// exist.
|
||||
template <Action::OffloadKind Kind>
|
||||
const ToolChain *getSingleOffloadToolChain() const {
|
||||
auto TCs = getOffloadToolChains<Kind>();
|
||||
|
||||
assert(TCs.first != TCs.second &&
|
||||
"No tool chains of the selected kind exist!");
|
||||
assert(std::next(TCs.first) == TCs.second &&
|
||||
"More than one tool chain of the this kind exist.");
|
||||
return TCs.first->second;
|
||||
}
|
||||
|
||||
void addOffloadDeviceToolChain(const ToolChain *DeviceToolChain,
|
||||
Action::OffloadKind OffloadKind) {
|
||||
assert(OffloadKind != Action::OFK_Host && OffloadKind != Action::OFK_None &&
|
||||
"This is not a device tool chain!");
|
||||
|
||||
// Update the host offload kind to also contain this kind.
|
||||
ActiveOffloadMask |= OffloadKind;
|
||||
OrderedOffloadingToolchains.insert(
|
||||
std::make_pair(OffloadKind, DeviceToolChain));
|
||||
}
|
||||
|
||||
const llvm::opt::InputArgList &getInputArgs() const { return *Args; }
|
||||
|
|
|
@ -275,6 +275,11 @@ public:
|
|||
/// @name Primary Functionality
|
||||
/// @{
|
||||
|
||||
/// CreateOffloadingDeviceToolChains - create all the toolchains required to
|
||||
/// support offloading devices given the programming models specified in the
|
||||
/// current compilation. Also, update the host tool chain kind accordingly.
|
||||
void CreateOffloadingDeviceToolChains(Compilation &C, InputList &Inputs);
|
||||
|
||||
/// BuildCompilation - Construct a compilation object for a command
|
||||
/// line argument vector.
|
||||
///
|
||||
|
|
|
@ -24,10 +24,13 @@ using namespace llvm::opt;
|
|||
|
||||
Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain,
|
||||
InputArgList *_Args, DerivedArgList *_TranslatedArgs)
|
||||
: TheDriver(D), DefaultToolChain(_DefaultToolChain),
|
||||
CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr),
|
||||
: TheDriver(D), DefaultToolChain(_DefaultToolChain), ActiveOffloadMask(0u),
|
||||
Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
|
||||
ForDiagnostics(false) {}
|
||||
ForDiagnostics(false) {
|
||||
// The offloading host toolchain is the default tool chain.
|
||||
OrderedOffloadingToolchains.insert(
|
||||
std::make_pair(Action::OFK_Host, &DefaultToolChain));
|
||||
}
|
||||
|
||||
Compilation::~Compilation() {
|
||||
delete TranslatedArgs;
|
||||
|
|
|
@ -422,6 +422,31 @@ void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
|
|||
}
|
||||
}
|
||||
|
||||
void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
|
||||
InputList &Inputs) {
|
||||
|
||||
//
|
||||
// CUDA
|
||||
//
|
||||
// We need to generate a CUDA toolchain if any of the inputs has a CUDA type.
|
||||
if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
|
||||
return types::isCuda(I.first);
|
||||
})) {
|
||||
const ToolChain &TC = getToolChain(
|
||||
C.getInputArgs(),
|
||||
llvm::Triple(C.getOffloadingHostToolChain()->getTriple().isArch64Bit()
|
||||
? "nvptx64-nvidia-cuda"
|
||||
: "nvptx-nvidia-cuda"));
|
||||
C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
|
||||
}
|
||||
|
||||
//
|
||||
// TODO: Add support for other offloading programming models here.
|
||||
//
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
|
||||
llvm::PrettyStackTraceString CrashInfo("Compilation construction");
|
||||
|
||||
|
@ -549,18 +574,8 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
|
|||
InputList Inputs;
|
||||
BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs);
|
||||
|
||||
// Initialize the CUDA device TC only if we have any CUDA Inputs. This is
|
||||
// necessary so that we don't break compilations that pass flags that are
|
||||
// incompatible with the NVPTX TC (e.g. -mthread-model single).
|
||||
if (llvm::any_of(Inputs, [](const std::pair<types::ID, const Arg *> &I) {
|
||||
return I.first == types::TY_CUDA || I.first == types::TY_PP_CUDA ||
|
||||
I.first == types::TY_CUDA_DEVICE;
|
||||
})) {
|
||||
C->setCudaDeviceToolChain(
|
||||
&getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit()
|
||||
? "nvptx64-nvidia-cuda"
|
||||
: "nvptx-nvidia-cuda")));
|
||||
}
|
||||
// Populate the tool chains for the offloading devices, if any.
|
||||
CreateOffloadingDeviceToolChains(*C, Inputs);
|
||||
|
||||
// Construct the list of abstract actions to perform for this compilation. On
|
||||
// MachO targets this uses the driver-driver and universal actions.
|
||||
|
@ -1390,7 +1405,7 @@ static Action *buildCudaActions(Compilation &C, DerivedArgList &Args,
|
|||
CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
|
||||
|
||||
// Build actions for all device inputs.
|
||||
assert(C.getCudaDeviceToolChain() &&
|
||||
assert(C.getSingleOffloadToolChain<Action::OFK_Cuda>() &&
|
||||
"Missing toolchain for device-side compilation.");
|
||||
ActionList CudaDeviceActions;
|
||||
C.getDriver().BuildActions(C, Args, CudaDeviceInputs, CudaDeviceActions);
|
||||
|
@ -2031,7 +2046,7 @@ InputInfo Driver::BuildJobsForActionNoCache(
|
|||
// Initial processing of CudaDeviceAction carries host params.
|
||||
// Call BuildJobsForAction() again, now with correct device parameters.
|
||||
InputInfo II = BuildJobsForAction(
|
||||
C, *CDA->input_begin(), C.getCudaDeviceToolChain(),
|
||||
C, *CDA->input_begin(), C.getSingleOffloadToolChain<Action::OFK_Cuda>(),
|
||||
CDA->getGpuArchName(), CDA->isAtTopLevel(), /*MultipleArchs=*/true,
|
||||
LinkingOutput, CachedResults);
|
||||
// Currently II's Action is *CDA->input_begin(). Set it to CDA instead, so
|
||||
|
|
|
@ -3767,10 +3767,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
// particular compilation pass we're constructing here. For now we
|
||||
// can check which toolchain we're using and pick the other one to
|
||||
// extract the triple.
|
||||
if (&getToolChain() == C.getCudaDeviceToolChain())
|
||||
AuxToolChain = C.getCudaHostToolChain();
|
||||
else if (&getToolChain() == C.getCudaHostToolChain())
|
||||
AuxToolChain = C.getCudaDeviceToolChain();
|
||||
if (&getToolChain() == C.getSingleOffloadToolChain<Action::OFK_Cuda>())
|
||||
AuxToolChain = C.getOffloadingHostToolChain();
|
||||
else if (&getToolChain() == C.getOffloadingHostToolChain())
|
||||
AuxToolChain = C.getSingleOffloadToolChain<Action::OFK_Cuda>();
|
||||
else
|
||||
llvm_unreachable("Can't figure out CUDA compilation mode.");
|
||||
assert(AuxToolChain != nullptr && "No aux toolchain.");
|
||||
|
|
Loading…
Reference in New Issue