2011-01-01 01:31:54 +08:00
|
|
|
//===--- Action.cpp - Abstract compilation steps --------------------------===//
|
2009-03-13 02:40:18 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "clang/Driver/Action.h"
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
#include "clang/Driver/ToolChain.h"
|
[Driver][OpenMP] Add support to create jobs for bundling actions.
Summary: This patch adds the support to create a job for the `OffloadBundlingAction` which will invoke the `clang-offload-bundler` tool.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21856
llvm-svn: 285325
2016-10-28 02:04:42 +08:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
2016-01-13 06:23:04 +08:00
|
|
|
#include "llvm/ADT/StringSwitch.h"
|
2011-09-23 13:57:42 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2016-01-12 07:27:13 +08:00
|
|
|
#include "llvm/Support/Regex.h"
|
2009-03-13 02:40:18 +08:00
|
|
|
#include <cassert>
|
|
|
|
using namespace clang::driver;
|
2013-06-15 01:17:23 +08:00
|
|
|
using namespace llvm::opt;
|
2009-03-13 02:40:18 +08:00
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
Action::~Action() {}
|
2009-03-13 20:17:08 +08:00
|
|
|
|
|
|
|
const char *Action::getClassName(ActionClass AC) {
|
|
|
|
switch (AC) {
|
|
|
|
case InputClass: return "input";
|
|
|
|
case BindArchClass: return "bind-arch";
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
case OffloadClass:
|
|
|
|
return "offload";
|
2009-03-14 01:52:07 +08:00
|
|
|
case PreprocessJobClass: return "preprocessor";
|
|
|
|
case PrecompileJobClass: return "precompiler";
|
|
|
|
case AnalyzeJobClass: return "analyzer";
|
2012-03-07 04:06:33 +08:00
|
|
|
case MigrateJobClass: return "migrator";
|
2009-03-14 01:52:07 +08:00
|
|
|
case CompileJobClass: return "compiler";
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
case BackendJobClass: return "backend";
|
2009-03-14 01:52:07 +08:00
|
|
|
case AssembleJobClass: return "assembler";
|
|
|
|
case LinkJobClass: return "linker";
|
2009-03-13 20:17:08 +08:00
|
|
|
case LipoJobClass: return "lipo";
|
2010-06-05 02:28:36 +08:00
|
|
|
case DsymutilJobClass: return "dsymutil";
|
2014-02-07 02:53:25 +08:00
|
|
|
case VerifyDebugInfoJobClass: return "verify-debug-info";
|
|
|
|
case VerifyPCHJobClass: return "verify-pch";
|
[Driver][OpenMP] Update actions builder to create bundling action when necessary.
Summary:
In order to save the user from dealing with multiple output files (for host and device) while using separate compilation, a new action `OffloadBundlingAction` is used when the last phase is not linking. This action will then result in a job that uses the proposed bundling tool to create a single preprocessed/IR/ASM/Object file from multiple ones.
The job creation for the new action will be proposed in a separate patch.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21852
llvm-svn: 285323
2016-10-28 01:50:43 +08:00
|
|
|
case OffloadBundlingJobClass:
|
|
|
|
return "clang-offload-bundler";
|
[Driver][OpenMP] Update actions builder to create unbundling action when necessary.
Summary:
Each time that offloading support is requested by the user and the input file is not a source file, an action `OffloadUnbundlingAction` is created to signal that the input file may contain bundles, so that the proper tool is then invoked to attempt to extract the components of the bundle. This patch adds the logic to create that action in offload action builder.
The job creation for the new action will be proposed in a separate patch.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21853
llvm-svn: 285324
2016-10-28 02:00:51 +08:00
|
|
|
case OffloadUnbundlingJobClass:
|
|
|
|
return "clang-offload-unbundler";
|
2009-03-13 20:17:08 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable("invalid class");
|
2009-03-13 20:17:08 +08:00
|
|
|
}
|
2009-03-14 07:08:03 +08:00
|
|
|
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
void Action::propagateDeviceOffloadInfo(OffloadKind OKind, const char *OArch) {
|
|
|
|
// Offload action set its own kinds on their dependences.
|
|
|
|
if (Kind == OffloadClass)
|
|
|
|
return;
|
[Driver][OpenMP] Update actions builder to create unbundling action when necessary.
Summary:
Each time that offloading support is requested by the user and the input file is not a source file, an action `OffloadUnbundlingAction` is created to signal that the input file may contain bundles, so that the proper tool is then invoked to attempt to extract the components of the bundle. This patch adds the logic to create that action in offload action builder.
The job creation for the new action will be proposed in a separate patch.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21853
llvm-svn: 285324
2016-10-28 02:00:51 +08:00
|
|
|
// Unbundling actions use the host kinds.
|
|
|
|
if (Kind == OffloadUnbundlingJobClass)
|
|
|
|
return;
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
|
|
|
assert((OffloadingDeviceKind == OKind || OffloadingDeviceKind == OFK_None) &&
|
|
|
|
"Setting device kind to a different device??");
|
|
|
|
assert(!ActiveOffloadKindMask && "Setting a device kind in a host action??");
|
|
|
|
OffloadingDeviceKind = OKind;
|
|
|
|
OffloadingArch = OArch;
|
|
|
|
|
|
|
|
for (auto *A : Inputs)
|
|
|
|
A->propagateDeviceOffloadInfo(OffloadingDeviceKind, OArch);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Action::propagateHostOffloadInfo(unsigned OKinds, const char *OArch) {
|
|
|
|
// Offload action set its own kinds on their dependences.
|
|
|
|
if (Kind == OffloadClass)
|
|
|
|
return;
|
|
|
|
|
|
|
|
assert(OffloadingDeviceKind == OFK_None &&
|
|
|
|
"Setting a host kind in a device action.");
|
|
|
|
ActiveOffloadKindMask |= OKinds;
|
|
|
|
OffloadingArch = OArch;
|
|
|
|
|
|
|
|
for (auto *A : Inputs)
|
|
|
|
A->propagateHostOffloadInfo(ActiveOffloadKindMask, OArch);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Action::propagateOffloadInfo(const Action *A) {
|
|
|
|
if (unsigned HK = A->getOffloadingHostActiveKinds())
|
|
|
|
propagateHostOffloadInfo(HK, A->getOffloadingArch());
|
|
|
|
else
|
|
|
|
propagateDeviceOffloadInfo(A->getOffloadingDeviceKind(),
|
|
|
|
A->getOffloadingArch());
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string Action::getOffloadingKindPrefix() const {
|
|
|
|
switch (OffloadingDeviceKind) {
|
|
|
|
case OFK_None:
|
|
|
|
break;
|
|
|
|
case OFK_Host:
|
|
|
|
llvm_unreachable("Host kind is not an offloading device kind.");
|
|
|
|
break;
|
|
|
|
case OFK_Cuda:
|
|
|
|
return "device-cuda";
|
[Driver][OpenMP] Create tool chains for OpenMP offloading kind.
Summary: This patch adds new logic to create the necessary tool chains to support offloading for OpenMP. The OpenMP related options are checked and the tool chains created accordingly. Diagnostics are emitted in case the options are illegal or express unknown targets.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, mkuron, mehdi_amini, cfe-commits, Hahnfeld, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21843
llvm-svn: 285311
2016-10-28 00:38:05 +08:00
|
|
|
case OFK_OpenMP:
|
|
|
|
return "device-openmp";
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
|
|
|
// TODO: Add other programming models here.
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ActiveOffloadKindMask)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
std::string Res("host");
|
|
|
|
if (ActiveOffloadKindMask & OFK_Cuda)
|
|
|
|
Res += "-cuda";
|
[Driver][OpenMP] Create tool chains for OpenMP offloading kind.
Summary: This patch adds new logic to create the necessary tool chains to support offloading for OpenMP. The OpenMP related options are checked and the tool chains created accordingly. Diagnostics are emitted in case the options are illegal or express unknown targets.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, mkuron, mehdi_amini, cfe-commits, Hahnfeld, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21843
llvm-svn: 285311
2016-10-28 00:38:05 +08:00
|
|
|
if (ActiveOffloadKindMask & OFK_OpenMP)
|
|
|
|
Res += "-openmp";
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
|
|
|
// TODO: Add other programming models here.
|
|
|
|
|
|
|
|
return Res;
|
|
|
|
}
|
|
|
|
|
[Driver][OpenMP] Add support to create jobs for unbundling actions.
Summary:
This patch adds the support to create jobs for the `OffloadBundlingAction` which will invoke the `clang-offload-bundler` tool to unbundle input files.
Unlike other actions, unbundling actions have multiple outputs. Therefore, this patch adds the required changes to have a variant of `Tool::ConstructJob` with multiple outputs.
The way the naming of the results is implemented is also slightly modified so that the same action can use a different offloading prefix for each use by the different offloading actions.
With this patch, it is possible to compile a functional OpenMP binary with offloading support, even with separate compilation.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: mkuron, whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21857
llvm-svn: 285326
2016-10-28 02:14:55 +08:00
|
|
|
/// Return a string that can be used as prefix in order to generate unique files
|
|
|
|
/// for each offloading kind.
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
std::string
|
[Driver][OpenMP] Add support to create jobs for unbundling actions.
Summary:
This patch adds the support to create jobs for the `OffloadBundlingAction` which will invoke the `clang-offload-bundler` tool to unbundle input files.
Unlike other actions, unbundling actions have multiple outputs. Therefore, this patch adds the required changes to have a variant of `Tool::ConstructJob` with multiple outputs.
The way the naming of the results is implemented is also slightly modified so that the same action can use a different offloading prefix for each use by the different offloading actions.
With this patch, it is possible to compile a functional OpenMP binary with offloading support, even with separate compilation.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: mkuron, whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21857
llvm-svn: 285326
2016-10-28 02:14:55 +08:00
|
|
|
Action::GetOffloadingFileNamePrefix(OffloadKind Kind,
|
|
|
|
llvm::StringRef NormalizedTriple,
|
|
|
|
bool CreatePrefixForHost) {
|
|
|
|
// Don't generate prefix for host actions unless required.
|
|
|
|
if (!CreatePrefixForHost && (Kind == OFK_None || Kind == OFK_Host))
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
return "";
|
|
|
|
|
|
|
|
std::string Res("-");
|
[Driver][OpenMP] Add support to create jobs for unbundling actions.
Summary:
This patch adds the support to create jobs for the `OffloadBundlingAction` which will invoke the `clang-offload-bundler` tool to unbundle input files.
Unlike other actions, unbundling actions have multiple outputs. Therefore, this patch adds the required changes to have a variant of `Tool::ConstructJob` with multiple outputs.
The way the naming of the results is implemented is also slightly modified so that the same action can use a different offloading prefix for each use by the different offloading actions.
With this patch, it is possible to compile a functional OpenMP binary with offloading support, even with separate compilation.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: mkuron, whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21857
llvm-svn: 285326
2016-10-28 02:14:55 +08:00
|
|
|
Res += GetOffloadKindName(Kind);
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
Res += "-";
|
|
|
|
Res += NormalizedTriple;
|
|
|
|
return Res;
|
|
|
|
}
|
|
|
|
|
[Driver][OpenMP] Add support to create jobs for bundling actions.
Summary: This patch adds the support to create a job for the `OffloadBundlingAction` which will invoke the `clang-offload-bundler` tool.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21856
llvm-svn: 285325
2016-10-28 02:04:42 +08:00
|
|
|
/// Return a string with the offload kind name. If that is not defined, we
|
|
|
|
/// assume 'host'.
|
|
|
|
llvm::StringRef Action::GetOffloadKindName(OffloadKind Kind) {
|
|
|
|
switch (Kind) {
|
|
|
|
case OFK_None:
|
|
|
|
case OFK_Host:
|
|
|
|
return "host";
|
|
|
|
case OFK_Cuda:
|
|
|
|
return "cuda";
|
|
|
|
case OFK_OpenMP:
|
|
|
|
return "openmp";
|
|
|
|
|
|
|
|
// TODO: Add other programming models here.
|
|
|
|
}
|
2016-10-28 18:09:35 +08:00
|
|
|
|
|
|
|
llvm_unreachable("invalid offload kind");
|
[Driver][OpenMP] Add support to create jobs for bundling actions.
Summary: This patch adds the support to create a job for the `OffloadBundlingAction` which will invoke the `clang-offload-bundler` tool.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21856
llvm-svn: 285325
2016-10-28 02:04:42 +08:00
|
|
|
}
|
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void InputAction::anchor() {}
|
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
InputAction::InputAction(const Arg &_Input, types::ID _Type)
|
2009-03-14 07:08:03 +08:00
|
|
|
: Action(InputClass, _Type), Input(_Input) {
|
|
|
|
}
|
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void BindArchAction::anchor() {}
|
|
|
|
|
2016-10-08 06:03:03 +08:00
|
|
|
BindArchAction::BindArchAction(Action *Input, llvm::StringRef ArchName)
|
|
|
|
: Action(BindArchClass, Input), ArchName(ArchName) {}
|
2009-03-14 07:08:03 +08:00
|
|
|
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
void OffloadAction::anchor() {}
|
|
|
|
|
|
|
|
OffloadAction::OffloadAction(const HostDependence &HDep)
|
|
|
|
: Action(OffloadClass, HDep.getAction()), HostTC(HDep.getToolChain()) {
|
|
|
|
OffloadingArch = HDep.getBoundArch();
|
|
|
|
ActiveOffloadKindMask = HDep.getOffloadKinds();
|
|
|
|
HDep.getAction()->propagateHostOffloadInfo(HDep.getOffloadKinds(),
|
|
|
|
HDep.getBoundArch());
|
2016-07-16 08:58:34 +08:00
|
|
|
}
|
2015-07-14 07:27:56 +08:00
|
|
|
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
OffloadAction::OffloadAction(const DeviceDependences &DDeps, types::ID Ty)
|
|
|
|
: Action(OffloadClass, DDeps.getActions(), Ty),
|
|
|
|
DevToolChains(DDeps.getToolChains()) {
|
|
|
|
auto &OKinds = DDeps.getOffloadKinds();
|
|
|
|
auto &BArchs = DDeps.getBoundArchs();
|
2015-07-14 07:27:56 +08:00
|
|
|
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
// If all inputs agree on the same kind, use it also for this action.
|
|
|
|
if (llvm::all_of(OKinds, [&](OffloadKind K) { return K == OKinds.front(); }))
|
|
|
|
OffloadingDeviceKind = OKinds.front();
|
|
|
|
|
|
|
|
// If we have a single dependency, inherit the architecture from it.
|
|
|
|
if (OKinds.size() == 1)
|
|
|
|
OffloadingArch = BArchs.front();
|
|
|
|
|
|
|
|
// Propagate info to the dependencies.
|
|
|
|
for (unsigned i = 0, e = getInputs().size(); i != e; ++i)
|
|
|
|
getInputs()[i]->propagateDeviceOffloadInfo(OKinds[i], BArchs[i]);
|
|
|
|
}
|
2015-07-14 07:27:56 +08:00
|
|
|
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
OffloadAction::OffloadAction(const HostDependence &HDep,
|
|
|
|
const DeviceDependences &DDeps)
|
|
|
|
: Action(OffloadClass, HDep.getAction()), HostTC(HDep.getToolChain()),
|
|
|
|
DevToolChains(DDeps.getToolChains()) {
|
|
|
|
// We use the kinds of the host dependence for this action.
|
|
|
|
OffloadingArch = HDep.getBoundArch();
|
|
|
|
ActiveOffloadKindMask = HDep.getOffloadKinds();
|
|
|
|
HDep.getAction()->propagateHostOffloadInfo(HDep.getOffloadKinds(),
|
|
|
|
HDep.getBoundArch());
|
|
|
|
|
|
|
|
// Add device inputs and propagate info to the device actions. Do work only if
|
|
|
|
// we have dependencies.
|
|
|
|
for (unsigned i = 0, e = DDeps.getActions().size(); i != e; ++i)
|
|
|
|
if (auto *A = DDeps.getActions()[i]) {
|
|
|
|
getInputs().push_back(A);
|
|
|
|
A->propagateDeviceOffloadInfo(DDeps.getOffloadKinds()[i],
|
|
|
|
DDeps.getBoundArchs()[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void OffloadAction::doOnHostDependence(const OffloadActionWorkTy &Work) const {
|
|
|
|
if (!HostTC)
|
|
|
|
return;
|
|
|
|
assert(!getInputs().empty() && "No dependencies for offload action??");
|
|
|
|
auto *A = getInputs().front();
|
|
|
|
Work(A, HostTC, A->getOffloadingArch());
|
|
|
|
}
|
|
|
|
|
|
|
|
void OffloadAction::doOnEachDeviceDependence(
|
|
|
|
const OffloadActionWorkTy &Work) const {
|
|
|
|
auto I = getInputs().begin();
|
|
|
|
auto E = getInputs().end();
|
|
|
|
if (I == E)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// We expect to have the same number of input dependences and device tool
|
|
|
|
// chains, except if we also have a host dependence. In that case we have one
|
|
|
|
// more dependence than we have device tool chains.
|
|
|
|
assert(getInputs().size() == DevToolChains.size() + (HostTC ? 1 : 0) &&
|
|
|
|
"Sizes of action dependences and toolchains are not consistent!");
|
|
|
|
|
|
|
|
// Skip host action
|
|
|
|
if (HostTC)
|
|
|
|
++I;
|
|
|
|
|
|
|
|
auto TI = DevToolChains.begin();
|
|
|
|
for (; I != E; ++I, ++TI)
|
|
|
|
Work(*I, *TI, (*I)->getOffloadingArch());
|
|
|
|
}
|
|
|
|
|
|
|
|
void OffloadAction::doOnEachDependence(const OffloadActionWorkTy &Work) const {
|
|
|
|
doOnHostDependence(Work);
|
|
|
|
doOnEachDeviceDependence(Work);
|
|
|
|
}
|
|
|
|
|
|
|
|
void OffloadAction::doOnEachDependence(bool IsHostDependence,
|
|
|
|
const OffloadActionWorkTy &Work) const {
|
|
|
|
if (IsHostDependence)
|
|
|
|
doOnHostDependence(Work);
|
|
|
|
else
|
|
|
|
doOnEachDeviceDependence(Work);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OffloadAction::hasHostDependence() const { return HostTC != nullptr; }
|
|
|
|
|
|
|
|
Action *OffloadAction::getHostDependence() const {
|
|
|
|
assert(hasHostDependence() && "Host dependence does not exist!");
|
|
|
|
assert(!getInputs().empty() && "No dependencies for offload action??");
|
|
|
|
return HostTC ? getInputs().front() : nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool OffloadAction::hasSingleDeviceDependence(
|
|
|
|
bool DoNotConsiderHostActions) const {
|
|
|
|
if (DoNotConsiderHostActions)
|
|
|
|
return getInputs().size() == (HostTC ? 2 : 1);
|
|
|
|
return !HostTC && getInputs().size() == 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
Action *
|
|
|
|
OffloadAction::getSingleDeviceDependence(bool DoNotConsiderHostActions) const {
|
|
|
|
assert(hasSingleDeviceDependence(DoNotConsiderHostActions) &&
|
|
|
|
"Single device dependence does not exist!");
|
|
|
|
// The previous assert ensures the number of entries in getInputs() is
|
|
|
|
// consistent with what we are doing here.
|
|
|
|
return HostTC ? getInputs()[1] : getInputs().front();
|
|
|
|
}
|
|
|
|
|
|
|
|
void OffloadAction::DeviceDependences::add(Action &A, const ToolChain &TC,
|
|
|
|
const char *BoundArch,
|
|
|
|
OffloadKind OKind) {
|
|
|
|
DeviceActions.push_back(&A);
|
|
|
|
DeviceToolChains.push_back(&TC);
|
|
|
|
DeviceBoundArchs.push_back(BoundArch);
|
|
|
|
DeviceOffloadKinds.push_back(OKind);
|
|
|
|
}
|
|
|
|
|
|
|
|
OffloadAction::HostDependence::HostDependence(Action &A, const ToolChain &TC,
|
|
|
|
const char *BoundArch,
|
|
|
|
const DeviceDependences &DDeps)
|
|
|
|
: HostAction(A), HostToolChain(TC), HostBoundArch(BoundArch) {
|
|
|
|
for (auto K : DDeps.getOffloadKinds())
|
|
|
|
HostOffloadKinds |= K;
|
|
|
|
}
|
2015-07-14 07:27:56 +08:00
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void JobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
JobAction::JobAction(ActionClass Kind, Action *Input, types::ID Type)
|
|
|
|
: Action(Kind, Input, Type) {}
|
2009-03-14 07:08:03 +08:00
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
JobAction::JobAction(ActionClass Kind, const ActionList &Inputs, types::ID Type)
|
2009-03-14 07:08:03 +08:00
|
|
|
: Action(Kind, Inputs, Type) {
|
|
|
|
}
|
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void PreprocessJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
PreprocessJobAction::PreprocessJobAction(Action *Input, types::ID OutputType)
|
|
|
|
: JobAction(PreprocessJobClass, Input, OutputType) {}
|
2009-03-14 07:08:03 +08:00
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void PrecompileJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
PrecompileJobAction::PrecompileJobAction(Action *Input, types::ID OutputType)
|
|
|
|
: JobAction(PrecompileJobClass, Input, OutputType) {}
|
2009-03-14 07:08:03 +08:00
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void AnalyzeJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
AnalyzeJobAction::AnalyzeJobAction(Action *Input, types::ID OutputType)
|
|
|
|
: JobAction(AnalyzeJobClass, Input, OutputType) {}
|
2009-03-14 07:08:03 +08:00
|
|
|
|
2012-03-07 04:06:33 +08:00
|
|
|
void MigrateJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
MigrateJobAction::MigrateJobAction(Action *Input, types::ID OutputType)
|
|
|
|
: JobAction(MigrateJobClass, Input, OutputType) {}
|
2012-03-07 04:06:33 +08:00
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void CompileJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
CompileJobAction::CompileJobAction(Action *Input, types::ID OutputType)
|
|
|
|
: JobAction(CompileJobClass, Input, OutputType) {}
|
2009-03-14 07:08:03 +08:00
|
|
|
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
void BackendJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
BackendJobAction::BackendJobAction(Action *Input, types::ID OutputType)
|
|
|
|
: JobAction(BackendJobClass, Input, OutputType) {}
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void AssembleJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
AssembleJobAction::AssembleJobAction(Action *Input, types::ID OutputType)
|
|
|
|
: JobAction(AssembleJobClass, Input, OutputType) {}
|
2009-03-14 07:08:03 +08:00
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void LinkJobAction::anchor() {}
|
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
LinkJobAction::LinkJobAction(ActionList &Inputs, types::ID Type)
|
2009-03-14 07:08:03 +08:00
|
|
|
: JobAction(LinkJobClass, Inputs, Type) {
|
|
|
|
}
|
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void LipoJobAction::anchor() {}
|
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
LipoJobAction::LipoJobAction(ActionList &Inputs, types::ID Type)
|
2009-03-14 07:08:03 +08:00
|
|
|
: JobAction(LipoJobClass, Inputs, Type) {
|
|
|
|
}
|
2010-06-05 02:28:36 +08:00
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void DsymutilJobAction::anchor() {}
|
|
|
|
|
2010-06-05 02:28:36 +08:00
|
|
|
DsymutilJobAction::DsymutilJobAction(ActionList &Inputs, types::ID Type)
|
|
|
|
: JobAction(DsymutilJobClass, Inputs, Type) {
|
|
|
|
}
|
2011-08-24 01:56:55 +08:00
|
|
|
|
2011-12-20 10:48:34 +08:00
|
|
|
void VerifyJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
VerifyJobAction::VerifyJobAction(ActionClass Kind, Action *Input,
|
|
|
|
types::ID Type)
|
|
|
|
: JobAction(Kind, Input, Type) {
|
2014-02-07 02:53:25 +08:00
|
|
|
assert((Kind == VerifyDebugInfoJobClass || Kind == VerifyPCHJobClass) &&
|
|
|
|
"ActionClass is not a valid VerifyJobAction");
|
|
|
|
}
|
|
|
|
|
|
|
|
void VerifyDebugInfoJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
VerifyDebugInfoJobAction::VerifyDebugInfoJobAction(Action *Input,
|
|
|
|
types::ID Type)
|
|
|
|
: VerifyJobAction(VerifyDebugInfoJobClass, Input, Type) {}
|
2014-02-07 02:53:25 +08:00
|
|
|
|
|
|
|
void VerifyPCHJobAction::anchor() {}
|
|
|
|
|
2016-01-12 07:07:27 +08:00
|
|
|
VerifyPCHJobAction::VerifyPCHJobAction(Action *Input, types::ID Type)
|
|
|
|
: VerifyJobAction(VerifyPCHJobClass, Input, Type) {}
|
[Driver][OpenMP] Update actions builder to create bundling action when necessary.
Summary:
In order to save the user from dealing with multiple output files (for host and device) while using separate compilation, a new action `OffloadBundlingAction` is used when the last phase is not linking. This action will then result in a job that uses the proposed bundling tool to create a single preprocessed/IR/ASM/Object file from multiple ones.
The job creation for the new action will be proposed in a separate patch.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21852
llvm-svn: 285323
2016-10-28 01:50:43 +08:00
|
|
|
|
|
|
|
void OffloadBundlingJobAction::anchor() {}
|
|
|
|
|
|
|
|
OffloadBundlingJobAction::OffloadBundlingJobAction(ActionList &Inputs)
|
|
|
|
: JobAction(OffloadBundlingJobClass, Inputs, Inputs.front()->getType()) {}
|
[Driver][OpenMP] Update actions builder to create unbundling action when necessary.
Summary:
Each time that offloading support is requested by the user and the input file is not a source file, an action `OffloadUnbundlingAction` is created to signal that the input file may contain bundles, so that the proper tool is then invoked to attempt to extract the components of the bundle. This patch adds the logic to create that action in offload action builder.
The job creation for the new action will be proposed in a separate patch.
Reviewers: echristo, tra, jlebar, ABataev, hfinkel
Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D21853
llvm-svn: 285324
2016-10-28 02:00:51 +08:00
|
|
|
|
|
|
|
void OffloadUnbundlingJobAction::anchor() {}
|
|
|
|
|
|
|
|
OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(Action *Input)
|
|
|
|
: JobAction(OffloadUnbundlingJobClass, Input, Input->getType()) {}
|