2010-09-24 07:48:20 +08:00
|
|
|
//===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===//
|
2009-03-03 03:59:07 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "clang/Driver/Driver.h"
|
2012-12-04 17:13:33 +08:00
|
|
|
#include "InputInfo.h"
|
|
|
|
#include "ToolChains.h"
|
|
|
|
#include "clang/Basic/Version.h"
|
2015-10-07 23:48:01 +08:00
|
|
|
#include "clang/Basic/VirtualFileSystem.h"
|
2014-06-04 11:28:55 +08:00
|
|
|
#include "clang/Config/config.h"
|
2009-03-12 15:58:46 +08:00
|
|
|
#include "clang/Driver/Action.h"
|
2009-03-05 04:49:20 +08:00
|
|
|
#include "clang/Driver/Compilation.h"
|
2009-03-12 16:55:43 +08:00
|
|
|
#include "clang/Driver/DriverDiagnostic.h"
|
2009-03-16 14:56:51 +08:00
|
|
|
#include "clang/Driver/Job.h"
|
2009-03-05 04:49:20 +08:00
|
|
|
#include "clang/Driver/Options.h"
|
2015-02-21 04:30:56 +08:00
|
|
|
#include "clang/Driver/SanitizerArgs.h"
|
2009-03-16 14:56:51 +08:00
|
|
|
#include "clang/Driver/Tool.h"
|
|
|
|
#include "clang/Driver/ToolChain.h"
|
2011-03-23 12:04:01 +08:00
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
2013-07-27 08:23:45 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2016-07-07 05:21:39 +08:00
|
|
|
#include "llvm/ADT/SmallSet.h"
|
2014-06-19 01:21:50 +08:00
|
|
|
#include "llvm/ADT/StringExtras.h"
|
2012-12-04 17:13:33 +08:00
|
|
|
#include "llvm/ADT/StringSet.h"
|
2013-07-19 04:29:38 +08:00
|
|
|
#include "llvm/ADT/StringSwitch.h"
|
2013-06-15 01:17:23 +08:00
|
|
|
#include "llvm/Option/Arg.h"
|
|
|
|
#include "llvm/Option/ArgList.h"
|
2014-01-07 19:51:46 +08:00
|
|
|
#include "llvm/Option/OptSpecifier.h"
|
2013-06-15 01:17:23 +08:00
|
|
|
#include "llvm/Option/OptTable.h"
|
|
|
|
#include "llvm/Option/Option.h"
|
2011-09-23 13:57:42 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2010-12-18 05:22:22 +08:00
|
|
|
#include "llvm/Support/FileSystem.h"
|
2010-11-30 02:12:39 +08:00
|
|
|
#include "llvm/Support/Path.h"
|
2012-12-04 17:13:33 +08:00
|
|
|
#include "llvm/Support/PrettyStackTrace.h"
|
2014-06-19 01:21:50 +08:00
|
|
|
#include "llvm/Support/Process.h"
|
2010-11-30 02:12:39 +08:00
|
|
|
#include "llvm/Support/Program.h"
|
2012-12-04 17:13:33 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2012-02-02 08:40:14 +08:00
|
|
|
#include <map>
|
2014-03-09 19:36:40 +08:00
|
|
|
#include <memory>
|
2016-05-27 22:27:13 +08:00
|
|
|
#include <utility>
|
2012-02-01 22:25:28 +08:00
|
|
|
|
2009-03-05 04:49:20 +08:00
|
|
|
using namespace clang::driver;
|
2009-03-26 13:56:24 +08:00
|
|
|
using namespace clang;
|
2013-06-15 01:17:23 +08:00
|
|
|
using namespace llvm::opt;
|
2009-03-05 04:49:20 +08:00
|
|
|
|
2015-02-03 06:41:48 +08:00
|
|
|
Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
|
2015-10-07 23:48:01 +08:00
|
|
|
DiagnosticsEngine &Diags,
|
|
|
|
IntrusiveRefCntPtr<vfs::FileSystem> VFS)
|
2016-05-27 22:27:13 +08:00
|
|
|
: Opts(createDriverOptTable()), Diags(Diags), VFS(std::move(VFS)),
|
|
|
|
Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone),
|
|
|
|
LTOMode(LTOK_None), ClangExecutable(ClangExecutable),
|
2015-02-03 06:41:48 +08:00
|
|
|
SysRoot(DEFAULT_SYSROOT), UseStdLib(true),
|
|
|
|
DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr),
|
|
|
|
CCPrintHeadersFilename(nullptr), CCLogDiagnosticsFilename(nullptr),
|
|
|
|
CCCPrintBindings(false), CCPrintHeaders(false), CCLogDiagnostics(false),
|
2016-07-19 03:56:33 +08:00
|
|
|
CCGenDiagnostics(false), DefaultTargetTriple(DefaultTargetTriple),
|
|
|
|
CCCGenericGCCName(""), CheckInputsExist(true), CCCUsePCH(true),
|
|
|
|
SuppressMissingInputWarning(false) {
|
2010-01-20 10:35:16 +08:00
|
|
|
|
2015-10-07 23:48:01 +08:00
|
|
|
// Provide a sane fallback if no VFS is specified.
|
|
|
|
if (!this->VFS)
|
|
|
|
this->VFS = vfs::getRealFileSystem();
|
|
|
|
|
2015-03-04 04:43:12 +08:00
|
|
|
Name = llvm::sys::path::filename(ClangExecutable);
|
2015-06-26 23:47:46 +08:00
|
|
|
Dir = llvm::sys::path::parent_path(ClangExecutable);
|
2015-10-13 23:19:32 +08:00
|
|
|
InstalledDir = Dir; // Provide a sensible default installed dir.
|
2013-03-23 13:17:59 +08:00
|
|
|
|
|
|
|
// Compute the path to the resource directory.
|
|
|
|
StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
|
|
|
|
SmallString<128> P(Dir);
|
2014-12-29 20:09:08 +08:00
|
|
|
if (ClangResourceDir != "") {
|
2013-03-23 13:17:59 +08:00
|
|
|
llvm::sys::path::append(P, ClangResourceDir);
|
2014-12-29 20:09:08 +08:00
|
|
|
} else {
|
|
|
|
StringRef ClangLibdirSuffix(CLANG_LIBDIR_SUFFIX);
|
|
|
|
llvm::sys::path::append(P, "..", Twine("lib") + ClangLibdirSuffix, "clang",
|
|
|
|
CLANG_VERSION_STRING);
|
|
|
|
}
|
2013-03-23 13:17:59 +08:00
|
|
|
ResourceDir = P.str();
|
2009-03-03 03:59:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Driver::~Driver() {
|
2009-03-05 04:49:20 +08:00
|
|
|
delete Opts;
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
|
2014-02-20 07:44:52 +08:00
|
|
|
llvm::DeleteContainerSeconds(ToolChains);
|
2009-03-03 03:59:07 +08:00
|
|
|
}
|
|
|
|
|
2016-08-13 01:47:52 +08:00
|
|
|
void Driver::ParseDriverMode(StringRef ProgramName,
|
|
|
|
ArrayRef<const char *> Args) {
|
|
|
|
auto Default = ToolChain::getTargetAndModeFromProgramName(ProgramName);
|
|
|
|
StringRef DefaultMode(Default.second);
|
|
|
|
setDriverModeFromOption(DefaultMode);
|
2013-07-19 04:29:38 +08:00
|
|
|
|
2015-06-26 02:51:37 +08:00
|
|
|
for (const char *ArgPtr : Args) {
|
2014-08-23 03:29:30 +08:00
|
|
|
// Ingore nullptrs, they are response file's EOL markers
|
2015-06-26 02:51:37 +08:00
|
|
|
if (ArgPtr == nullptr)
|
2014-08-23 03:29:30 +08:00
|
|
|
continue;
|
2015-06-26 02:51:37 +08:00
|
|
|
const StringRef Arg = ArgPtr;
|
2016-08-13 01:47:52 +08:00
|
|
|
setDriverModeFromOption(Arg);
|
2013-07-19 04:29:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-13 01:47:52 +08:00
|
|
|
void Driver::setDriverModeFromOption(StringRef Opt) {
|
|
|
|
const std::string OptName =
|
|
|
|
getOpts().getOption(options::OPT_driver_mode).getPrefixedName();
|
|
|
|
if (!Opt.startswith(OptName))
|
|
|
|
return;
|
|
|
|
StringRef Value = Opt.drop_front(OptName.size());
|
|
|
|
|
|
|
|
const unsigned M = llvm::StringSwitch<unsigned>(Value)
|
|
|
|
.Case("gcc", GCCMode)
|
|
|
|
.Case("g++", GXXMode)
|
|
|
|
.Case("cpp", CPPMode)
|
|
|
|
.Case("cl", CLMode)
|
|
|
|
.Default(~0U);
|
|
|
|
|
|
|
|
if (M != ~0U)
|
|
|
|
Mode = static_cast<DriverMode>(M);
|
|
|
|
else
|
|
|
|
Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
|
|
|
|
}
|
|
|
|
|
2015-06-23 06:07:27 +08:00
|
|
|
InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings) {
|
2009-03-18 09:38:48 +08:00
|
|
|
llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
|
2013-07-27 08:23:45 +08:00
|
|
|
|
|
|
|
unsigned IncludedFlagsBitmask;
|
|
|
|
unsigned ExcludedFlagsBitmask;
|
2014-03-02 21:01:17 +08:00
|
|
|
std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
|
2015-06-26 23:47:46 +08:00
|
|
|
getIncludeExcludeOptionFlagMasks();
|
2013-07-27 08:23:45 +08:00
|
|
|
|
2009-11-19 14:35:06 +08:00
|
|
|
unsigned MissingArgIndex, MissingArgCount;
|
2015-06-23 06:07:27 +08:00
|
|
|
InputArgList Args =
|
2015-06-21 14:32:36 +08:00
|
|
|
getOpts().ParseArgs(ArgStrings, MissingArgIndex, MissingArgCount,
|
|
|
|
IncludedFlagsBitmask, ExcludedFlagsBitmask);
|
2009-11-19 14:35:06 +08:00
|
|
|
|
|
|
|
// Check for missing argument error.
|
|
|
|
if (MissingArgCount)
|
|
|
|
Diag(clang::diag::err_drv_missing_argument)
|
2015-06-23 06:07:27 +08:00
|
|
|
<< Args.getArgString(MissingArgIndex) << MissingArgCount;
|
2009-11-19 14:35:06 +08:00
|
|
|
|
|
|
|
// Check for unsupported options.
|
2015-06-23 06:07:27 +08:00
|
|
|
for (const Arg *A : Args) {
|
2012-10-20 06:37:06 +08:00
|
|
|
if (A->getOption().hasFlag(options::Unsupported)) {
|
2015-06-23 06:07:27 +08:00
|
|
|
Diag(clang::diag::err_drv_unsupported_opt) << A->getAsString(Args);
|
2009-03-23 07:26:43 +08:00
|
|
|
continue;
|
|
|
|
}
|
2012-02-23 01:55:22 +08:00
|
|
|
|
|
|
|
// Warn about -mcpu= without an argument.
|
2015-06-26 23:47:46 +08:00
|
|
|
if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) {
|
2015-06-23 06:07:27 +08:00
|
|
|
Diag(clang::diag::warn_drv_empty_joined_argument) << A->getAsString(Args);
|
2012-02-23 01:55:22 +08:00
|
|
|
}
|
2009-03-05 14:38:47 +08:00
|
|
|
}
|
|
|
|
|
2015-06-23 06:07:27 +08:00
|
|
|
for (const Arg *A : Args.filtered(options::OPT_UNKNOWN))
|
2016-01-26 05:14:52 +08:00
|
|
|
Diags.Report(IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl :
|
|
|
|
diag::err_drv_unknown_argument)
|
|
|
|
<< A->getAsString(Args);
|
2013-09-24 07:55:25 +08:00
|
|
|
|
2009-03-05 14:38:47 +08:00
|
|
|
return Args;
|
|
|
|
}
|
|
|
|
|
2011-07-28 07:36:45 +08:00
|
|
|
// Determine which compilation mode we are in. We look for options which
|
|
|
|
// affect the phase, starting with the earliest phases, and record which
|
|
|
|
// option we used to determine the final phase.
|
2015-06-26 23:47:46 +08:00
|
|
|
phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
|
|
|
|
Arg **FinalPhaseArg) const {
|
2014-05-18 00:56:41 +08:00
|
|
|
Arg *PhaseArg = nullptr;
|
2011-07-28 07:36:45 +08:00
|
|
|
phases::ID FinalPhase;
|
2011-08-18 06:59:59 +08:00
|
|
|
|
2014-06-14 04:59:54 +08:00
|
|
|
// -{E,EP,P,M,MM} only run the preprocessor.
|
2015-06-26 23:47:46 +08:00
|
|
|
if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
|
2014-06-14 04:59:54 +08:00
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) ||
|
2013-12-21 02:40:46 +08:00
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) ||
|
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) {
|
2011-07-28 07:36:45 +08:00
|
|
|
FinalPhase = phases::Preprocess;
|
2011-08-18 06:59:59 +08:00
|
|
|
|
Unrevert r280035 now that the clang-cl bug it exposed has been fixed by
r280133. Original commit message:
C++ Modules TS: driver support for building modules.
This works as follows: we add --precompile to the existing gamut of options for
specifying how far to go when compiling an input (-E, -c, -S, etc.). This flag
specifies that an input is taken to the precompilation step and no further, and
this can be specified when building a .pcm from a module interface or when
building a .pch from a header file.
The .cppm extension (and some related extensions) are implicitly recognized as
C++ module interface files. If --precompile is /not/ specified, the file is
compiled (via a .pcm) to a .o file containing the code for the module (and then
potentially also assembled and linked, if -S, -c, etc. are not specified). We
do not yet suppress the emission of object code for other users of the module
interface, so for now this will only work if everything in the .cppm file has
vague linkage.
As with the existing support for module-map modules, prebuilt modules can be
provided as compiler inputs either via the -fmodule-file= command-line argument
or via files named ModuleName.pcm in one of the directories specified via
-fprebuilt-module-path=.
This also exposes the -fmodules-ts cc1 flag in the driver. This is still
experimental, and in particular, the concrete syntax is subject to change as
the Modules TS evolves in the C++ committee. Unlike -fmodules, this flag does
not enable support for implicitly loading module maps nor building modules via
the module cache, but those features can be turned on separately and used in
conjunction with the Modules TS support.
llvm-svn: 280134
2016-08-31 03:06:26 +08:00
|
|
|
// --precompile only runs up to precompilation.
|
|
|
|
} else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) {
|
|
|
|
FinalPhase = phases::Precompile;
|
|
|
|
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
// -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.
|
2011-07-28 07:36:45 +08:00
|
|
|
} else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) ||
|
2013-03-28 00:47:18 +08:00
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) ||
|
2014-02-06 06:21:15 +08:00
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT_verify_pch)) ||
|
2011-07-28 07:36:45 +08:00
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) ||
|
2012-04-02 23:59:19 +08:00
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) ||
|
2012-03-07 04:06:33 +08:00
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT__migrate)) ||
|
2011-07-28 07:36:45 +08:00
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT__analyze,
|
2012-03-07 07:14:35 +08:00
|
|
|
options::OPT__analyze_auto)) ||
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
(PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
|
2011-07-28 07:36:45 +08:00
|
|
|
FinalPhase = phases::Compile;
|
|
|
|
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
// -S only runs up to the backend.
|
|
|
|
} else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
|
|
|
|
FinalPhase = phases::Backend;
|
|
|
|
|
2015-07-29 05:01:21 +08:00
|
|
|
// -c compilation only runs up to the assembler.
|
|
|
|
} else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
|
2011-07-28 07:36:45 +08:00
|
|
|
FinalPhase = phases::Assemble;
|
|
|
|
|
|
|
|
// Otherwise do everything.
|
|
|
|
} else
|
|
|
|
FinalPhase = phases::Link;
|
|
|
|
|
|
|
|
if (FinalPhaseArg)
|
|
|
|
*FinalPhaseArg = PhaseArg;
|
|
|
|
|
|
|
|
return FinalPhase;
|
|
|
|
}
|
|
|
|
|
2015-06-26 23:47:46 +08:00
|
|
|
static Arg *MakeInputArg(DerivedArgList &Args, OptTable *Opts,
|
2013-08-14 05:32:29 +08:00
|
|
|
StringRef Value) {
|
|
|
|
Arg *A = new Arg(Opts->getOption(options::OPT_INPUT), Value,
|
|
|
|
Args.getBaseArgs().MakeIndex(Value), Value.data());
|
2014-05-03 06:55:30 +08:00
|
|
|
Args.AddSynthesizedArg(A);
|
2013-08-14 05:32:29 +08:00
|
|
|
A->claim();
|
|
|
|
return A;
|
|
|
|
}
|
|
|
|
|
2010-06-12 06:00:26 +08:00
|
|
|
DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
|
|
|
|
DerivedArgList *DAL = new DerivedArgList(Args);
|
|
|
|
|
2010-09-17 08:45:02 +08:00
|
|
|
bool HasNostdlib = Args.hasArg(options::OPT_nostdlib);
|
2015-11-25 00:07:21 +08:00
|
|
|
bool HasNodefaultlib = Args.hasArg(options::OPT_nodefaultlibs);
|
2014-12-30 03:01:36 +08:00
|
|
|
for (Arg *A : Args) {
|
2010-06-15 05:23:12 +08:00
|
|
|
// Unfortunately, we have to parse some forwarding options (-Xassembler,
|
|
|
|
// -Xlinker, -Xpreprocessor) because we either integrate their functionality
|
|
|
|
// (assembler and preprocessor), or bypass a previous driver ('collect2').
|
2010-06-15 05:37:09 +08:00
|
|
|
|
|
|
|
// Rewrite linker options, to replace --no-demangle with a custom internal
|
|
|
|
// option.
|
|
|
|
if ((A->getOption().matches(options::OPT_Wl_COMMA) ||
|
|
|
|
A->getOption().matches(options::OPT_Xlinker)) &&
|
|
|
|
A->containsValue("--no-demangle")) {
|
2010-06-15 05:23:12 +08:00
|
|
|
// Add the rewritten no-demangle argument.
|
|
|
|
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_Xlinker__no_demangle));
|
|
|
|
|
|
|
|
// Add the remaining values as Xlinker arguments.
|
2015-09-24 22:48:49 +08:00
|
|
|
for (StringRef Val : A->getValues())
|
2015-07-01 03:32:57 +08:00
|
|
|
if (Val != "--no-demangle")
|
|
|
|
DAL->AddSeparateArg(A, Opts->getOption(options::OPT_Xlinker), Val);
|
2010-06-15 05:23:12 +08:00
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2010-06-15 05:37:09 +08:00
|
|
|
// Rewrite preprocessor options, to replace -Wp,-MD,FOO which is used by
|
|
|
|
// some build systems. We don't try to be complete here because we don't
|
|
|
|
// care to encourage this usage model.
|
|
|
|
if (A->getOption().matches(options::OPT_Wp_COMMA) &&
|
2012-11-01 12:30:05 +08:00
|
|
|
(A->getValue(0) == StringRef("-MD") ||
|
|
|
|
A->getValue(0) == StringRef("-MMD"))) {
|
2010-06-16 04:30:18 +08:00
|
|
|
// Rewrite to -MD/-MMD along with -MF.
|
2012-11-01 12:30:05 +08:00
|
|
|
if (A->getValue(0) == StringRef("-MD"))
|
2010-06-16 04:30:18 +08:00
|
|
|
DAL->AddFlagArg(A, Opts->getOption(options::OPT_MD));
|
|
|
|
else
|
|
|
|
DAL->AddFlagArg(A, Opts->getOption(options::OPT_MMD));
|
2012-11-08 07:37:14 +08:00
|
|
|
if (A->getNumValues() == 2)
|
|
|
|
DAL->AddSeparateArg(A, Opts->getOption(options::OPT_MF),
|
|
|
|
A->getValue(1));
|
2010-06-15 05:37:09 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2010-09-18 02:39:08 +08:00
|
|
|
// Rewrite reserved library names.
|
|
|
|
if (A->getOption().matches(options::OPT_l)) {
|
2012-11-01 12:30:05 +08:00
|
|
|
StringRef Value = A->getValue();
|
2010-09-17 08:45:02 +08:00
|
|
|
|
2010-09-18 02:39:08 +08:00
|
|
|
// Rewrite unless -nostdlib is present.
|
2015-11-25 00:07:21 +08:00
|
|
|
if (!HasNostdlib && !HasNodefaultlib && Value == "stdc++") {
|
2015-06-26 23:47:46 +08:00
|
|
|
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_stdcxx));
|
2010-09-17 08:45:02 +08:00
|
|
|
continue;
|
|
|
|
}
|
2010-09-18 02:39:08 +08:00
|
|
|
|
|
|
|
// Rewrite unconditionally.
|
|
|
|
if (Value == "cc_kext") {
|
2015-06-26 23:47:46 +08:00
|
|
|
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_cckext));
|
2010-09-18 02:39:08 +08:00
|
|
|
continue;
|
|
|
|
}
|
2010-09-17 08:45:02 +08:00
|
|
|
}
|
|
|
|
|
2013-08-14 05:32:29 +08:00
|
|
|
// Pick up inputs via the -- option.
|
|
|
|
if (A->getOption().matches(options::OPT__DASH_DASH)) {
|
|
|
|
A->claim();
|
2015-09-24 22:48:49 +08:00
|
|
|
for (StringRef Val : A->getValues())
|
2015-07-01 03:32:57 +08:00
|
|
|
DAL->append(MakeInputArg(*DAL, Opts, Val));
|
2013-08-14 05:32:29 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-12-30 03:01:36 +08:00
|
|
|
DAL->append(A);
|
2010-06-15 05:23:12 +08:00
|
|
|
}
|
2010-06-12 06:00:26 +08:00
|
|
|
|
2016-04-21 18:16:48 +08:00
|
|
|
// Enforce -static if -miamcu is present.
|
2016-06-29 18:57:17 +08:00
|
|
|
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false))
|
|
|
|
DAL->AddFlagArg(0, Opts->getOption(options::OPT_static));
|
2016-04-21 18:16:48 +08:00
|
|
|
|
2015-06-26 23:47:46 +08:00
|
|
|
// Add a default value of -mlinker-version=, if one was given and the user
|
|
|
|
// didn't specify one.
|
2010-08-12 08:05:12 +08:00
|
|
|
#if defined(HOST_LINK_VERSION)
|
2015-06-13 03:21:35 +08:00
|
|
|
if (!Args.hasArg(options::OPT_mlinker_version_EQ) &&
|
|
|
|
strlen(HOST_LINK_VERSION) > 0) {
|
2010-08-12 08:05:12 +08:00
|
|
|
DAL->AddJoinedArg(0, Opts->getOption(options::OPT_mlinker_version_EQ),
|
|
|
|
HOST_LINK_VERSION);
|
2010-08-18 06:32:45 +08:00
|
|
|
DAL->getLastArg(options::OPT_mlinker_version_EQ)->claim();
|
2010-08-12 08:05:12 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2010-06-12 06:00:26 +08:00
|
|
|
return DAL;
|
|
|
|
}
|
|
|
|
|
2015-07-11 03:47:55 +08:00
|
|
|
/// \brief Compute target triple from args.
|
|
|
|
///
|
|
|
|
/// This routine provides the logic to compute a target triple from various
|
|
|
|
/// args passed to the driver and the default triple string.
|
2016-04-21 18:16:48 +08:00
|
|
|
static llvm::Triple computeTargetTriple(const Driver &D,
|
|
|
|
StringRef DefaultTargetTriple,
|
2015-07-11 03:47:55 +08:00
|
|
|
const ArgList &Args,
|
|
|
|
StringRef DarwinArchName = "") {
|
|
|
|
// FIXME: Already done in Compilation *Driver::BuildCompilation
|
|
|
|
if (const Arg *A = Args.getLastArg(options::OPT_target))
|
|
|
|
DefaultTargetTriple = A->getValue();
|
|
|
|
|
|
|
|
llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple));
|
|
|
|
|
|
|
|
// Handle Apple-specific options available here.
|
|
|
|
if (Target.isOSBinFormatMachO()) {
|
|
|
|
// If an explict Darwin arch name is given, that trumps all.
|
|
|
|
if (!DarwinArchName.empty()) {
|
|
|
|
tools::darwin::setTripleTypeForMachOArchName(Target, DarwinArchName);
|
|
|
|
return Target;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle the Darwin '-arch' flag.
|
|
|
|
if (Arg *A = Args.getLastArg(options::OPT_arch)) {
|
|
|
|
StringRef ArchName = A->getValue();
|
|
|
|
tools::darwin::setTripleTypeForMachOArchName(Target, ArchName);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle pseudo-target flags '-mlittle-endian'/'-EL' and
|
|
|
|
// '-mbig-endian'/'-EB'.
|
|
|
|
if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
|
|
|
|
options::OPT_mbig_endian)) {
|
|
|
|
if (A->getOption().matches(options::OPT_mlittle_endian)) {
|
|
|
|
llvm::Triple LE = Target.getLittleEndianArchVariant();
|
|
|
|
if (LE.getArch() != llvm::Triple::UnknownArch)
|
|
|
|
Target = std::move(LE);
|
|
|
|
} else {
|
|
|
|
llvm::Triple BE = Target.getBigEndianArchVariant();
|
|
|
|
if (BE.getArch() != llvm::Triple::UnknownArch)
|
|
|
|
Target = std::move(BE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip further flag support on OSes which don't support '-m32' or '-m64'.
|
2015-08-13 02:36:12 +08:00
|
|
|
if (Target.getArch() == llvm::Triple::tce ||
|
|
|
|
Target.getOS() == llvm::Triple::Minix)
|
2015-07-11 03:47:55 +08:00
|
|
|
return Target;
|
|
|
|
|
|
|
|
// Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'.
|
2016-04-21 18:16:48 +08:00
|
|
|
Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32,
|
|
|
|
options::OPT_m32, options::OPT_m16);
|
|
|
|
if (A) {
|
2015-07-11 03:47:55 +08:00
|
|
|
llvm::Triple::ArchType AT = llvm::Triple::UnknownArch;
|
|
|
|
|
|
|
|
if (A->getOption().matches(options::OPT_m64)) {
|
|
|
|
AT = Target.get64BitArchVariant().getArch();
|
|
|
|
if (Target.getEnvironment() == llvm::Triple::GNUX32)
|
|
|
|
Target.setEnvironment(llvm::Triple::GNU);
|
|
|
|
} else if (A->getOption().matches(options::OPT_mx32) &&
|
|
|
|
Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) {
|
|
|
|
AT = llvm::Triple::x86_64;
|
|
|
|
Target.setEnvironment(llvm::Triple::GNUX32);
|
|
|
|
} else if (A->getOption().matches(options::OPT_m32)) {
|
|
|
|
AT = Target.get32BitArchVariant().getArch();
|
|
|
|
if (Target.getEnvironment() == llvm::Triple::GNUX32)
|
|
|
|
Target.setEnvironment(llvm::Triple::GNU);
|
|
|
|
} else if (A->getOption().matches(options::OPT_m16) &&
|
|
|
|
Target.get32BitArchVariant().getArch() == llvm::Triple::x86) {
|
|
|
|
AT = llvm::Triple::x86;
|
|
|
|
Target.setEnvironment(llvm::Triple::CODE16);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (AT != llvm::Triple::UnknownArch && AT != Target.getArch())
|
|
|
|
Target.setArch(AT);
|
|
|
|
}
|
|
|
|
|
2016-04-21 18:16:48 +08:00
|
|
|
// Handle -miamcu flag.
|
2016-06-29 18:57:17 +08:00
|
|
|
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
|
|
|
|
if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86)
|
|
|
|
D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu"
|
|
|
|
<< Target.str();
|
|
|
|
|
|
|
|
if (A && !A->getOption().matches(options::OPT_m32))
|
|
|
|
D.Diag(diag::err_drv_argument_not_allowed_with)
|
|
|
|
<< "-miamcu" << A->getBaseArg().getAsString(Args);
|
|
|
|
|
|
|
|
Target.setArch(llvm::Triple::x86);
|
|
|
|
Target.setArchName("i586");
|
|
|
|
Target.setEnvironment(llvm::Triple::UnknownEnvironment);
|
|
|
|
Target.setEnvironmentName("");
|
|
|
|
Target.setOS(llvm::Triple::ELFIAMCU);
|
|
|
|
Target.setVendor(llvm::Triple::UnknownVendor);
|
|
|
|
Target.setVendorName("intel");
|
2016-04-21 18:16:48 +08:00
|
|
|
}
|
|
|
|
|
2015-07-11 03:47:55 +08:00
|
|
|
return Target;
|
|
|
|
}
|
|
|
|
|
2015-10-16 04:35:53 +08:00
|
|
|
// \brief Parse the LTO options and record the type of LTO compilation
|
|
|
|
// based on which -f(no-)?lto(=.*)? option occurs last.
|
|
|
|
void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
|
|
|
|
LTOMode = LTOK_None;
|
|
|
|
if (!Args.hasFlag(options::OPT_flto, options::OPT_flto_EQ,
|
|
|
|
options::OPT_fno_lto, false))
|
|
|
|
return;
|
|
|
|
|
|
|
|
StringRef LTOName("full");
|
|
|
|
|
|
|
|
const Arg *A = Args.getLastArg(options::OPT_flto_EQ);
|
2015-11-03 02:03:12 +08:00
|
|
|
if (A)
|
|
|
|
LTOName = A->getValue();
|
2015-10-16 04:35:53 +08:00
|
|
|
|
|
|
|
LTOMode = llvm::StringSwitch<LTOKind>(LTOName)
|
|
|
|
.Case("full", LTOK_Full)
|
|
|
|
.Case("thin", LTOK_Thin)
|
|
|
|
.Default(LTOK_Unknown);
|
|
|
|
|
|
|
|
if (LTOMode == LTOK_Unknown) {
|
|
|
|
assert(A);
|
|
|
|
Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName()
|
|
|
|
<< A->getValue();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[CUDA][OpenMP] Create generic offload toolchains
Summary:
This patch introduces the concept of offloading tool chain and offloading kind. Each tool chain may have associated an offloading kind that marks it as used in a given programming model that requires offloading.
It also adds the logic to iterate on the tool chains based on the kind. Currently, only CUDA is supported, but in general a programming model (an offloading kind) may have associated multiple tool chains that require supporting offloading.
This patch does not add tests - its goal is to keep the existing functionality.
This patch is the first of a series of three that attempts to make the current support of CUDA more generic and easier to extend to other programming models, namely OpenMP. It tries to capture the suggestions/improvements/concerns on the initial proposal in http://lists.llvm.org/pipermail/cfe-dev/2016-February/047547.html. It only tackles the more consensual part of the proposal, i.e.does not address the problem of intermediate files bundling yet.
Reviewers: ABataev, jlebar, echristo, hfinkel, tra
Subscribers: guansong, Hahnfeld, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: http://reviews.llvm.org/D18170
llvm-svn: 272571
2016-06-14 02:10:57 +08:00
|
|
|
void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
|
|
|
|
InputList &Inputs) {
|
|
|
|
|
|
|
|
//
|
|
|
|
// CUDA
|
|
|
|
//
|
|
|
|
// We need to generate a CUDA toolchain if any of the inputs has a CUDA type.
|
|
|
|
if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
|
|
|
|
return types::isCuda(I.first);
|
|
|
|
})) {
|
|
|
|
const ToolChain &TC = getToolChain(
|
|
|
|
C.getInputArgs(),
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
llvm::Triple(C.getSingleOffloadToolChain<Action::OFK_Host>()
|
|
|
|
->getTriple()
|
|
|
|
.isArch64Bit()
|
[CUDA][OpenMP] Create generic offload toolchains
Summary:
This patch introduces the concept of offloading tool chain and offloading kind. Each tool chain may have associated an offloading kind that marks it as used in a given programming model that requires offloading.
It also adds the logic to iterate on the tool chains based on the kind. Currently, only CUDA is supported, but in general a programming model (an offloading kind) may have associated multiple tool chains that require supporting offloading.
This patch does not add tests - its goal is to keep the existing functionality.
This patch is the first of a series of three that attempts to make the current support of CUDA more generic and easier to extend to other programming models, namely OpenMP. It tries to capture the suggestions/improvements/concerns on the initial proposal in http://lists.llvm.org/pipermail/cfe-dev/2016-February/047547.html. It only tackles the more consensual part of the proposal, i.e.does not address the problem of intermediate files bundling yet.
Reviewers: ABataev, jlebar, echristo, hfinkel, tra
Subscribers: guansong, Hahnfeld, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: http://reviews.llvm.org/D18170
llvm-svn: 272571
2016-06-14 02:10:57 +08:00
|
|
|
? "nvptx64-nvidia-cuda"
|
|
|
|
: "nvptx-nvidia-cuda"));
|
|
|
|
C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// TODO: Add support for other offloading programming models here.
|
|
|
|
//
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-07-24 01:14:25 +08:00
|
|
|
Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
|
2009-03-18 09:38:48 +08:00
|
|
|
llvm::PrettyStackTraceString CrashInfo("Compilation construction");
|
|
|
|
|
2011-08-18 06:59:59 +08:00
|
|
|
// FIXME: Handle environment options which affect driver behavior, somewhere
|
2012-03-13 05:24:57 +08:00
|
|
|
// (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS.
|
2011-09-14 08:47:55 +08:00
|
|
|
|
2016-07-25 01:44:03 +08:00
|
|
|
if (Optional<std::string> CompilerPathValue =
|
|
|
|
llvm::sys::Process::GetEnv("COMPILER_PATH")) {
|
|
|
|
StringRef CompilerPath = *CompilerPathValue;
|
2011-09-14 08:47:55 +08:00
|
|
|
while (!CompilerPath.empty()) {
|
2015-06-26 23:47:46 +08:00
|
|
|
std::pair<StringRef, StringRef> Split =
|
|
|
|
CompilerPath.split(llvm::sys::EnvPathSeparator);
|
2011-09-14 08:47:55 +08:00
|
|
|
PrefixDirs.push_back(Split.first);
|
|
|
|
CompilerPath = Split.second;
|
|
|
|
}
|
|
|
|
}
|
2009-03-13 08:51:18 +08:00
|
|
|
|
2013-07-19 04:29:38 +08:00
|
|
|
// We look for the driver mode option early, because the mode can affect
|
|
|
|
// how other options are parsed.
|
2016-08-13 01:47:52 +08:00
|
|
|
ParseDriverMode(ClangExecutable, ArgList.slice(1));
|
2013-07-19 04:29:38 +08:00
|
|
|
|
2009-03-13 08:51:18 +08:00
|
|
|
// FIXME: What are we going to do with -V and -b?
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// FIXME: This stuff needs to go into the Compilation, not the driver.
|
2015-06-26 03:37:41 +08:00
|
|
|
bool CCCPrintPhases;
|
2009-03-05 14:38:47 +08:00
|
|
|
|
2015-06-23 06:07:27 +08:00
|
|
|
InputArgList Args = ParseArgStrings(ArgList.slice(1));
|
2009-12-05 05:55:23 +08:00
|
|
|
|
2015-07-18 14:35:24 +08:00
|
|
|
// Silence driver warnings if requested
|
|
|
|
Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
|
|
|
|
|
2009-12-08 02:28:29 +08:00
|
|
|
// -no-canonical-prefixes is used very early in main.
|
2015-06-23 06:07:27 +08:00
|
|
|
Args.ClaimAllArgs(options::OPT_no_canonical_prefixes);
|
2009-12-08 02:28:29 +08:00
|
|
|
|
2010-08-02 10:38:03 +08:00
|
|
|
// Ignore -pipe.
|
2015-06-23 06:07:27 +08:00
|
|
|
Args.ClaimAllArgs(options::OPT_pipe);
|
2010-08-02 10:38:03 +08:00
|
|
|
|
2009-12-05 05:55:23 +08:00
|
|
|
// Extract -ccc args.
|
2009-03-11 04:52:46 +08:00
|
|
|
//
|
2009-09-09 07:36:43 +08:00
|
|
|
// FIXME: We need to figure out where this behavior should live. Most of it
|
|
|
|
// should be outside in the client; the parts that aren't should have proper
|
|
|
|
// options, either by introducing new ones or by overloading gcc ones like -V
|
|
|
|
// or -b.
|
2015-06-26 03:37:41 +08:00
|
|
|
CCCPrintPhases = Args.hasArg(options::OPT_ccc_print_phases);
|
2015-06-23 06:07:27 +08:00
|
|
|
CCCPrintBindings = Args.hasArg(options::OPT_ccc_print_bindings);
|
|
|
|
if (const Arg *A = Args.getLastArg(options::OPT_ccc_gcc_name))
|
2012-11-01 12:30:05 +08:00
|
|
|
CCCGenericGCCName = A->getValue();
|
2015-06-23 06:07:27 +08:00
|
|
|
CCCUsePCH =
|
|
|
|
Args.hasFlag(options::OPT_ccc_pch_is_pch, options::OPT_ccc_pch_is_pth);
|
2012-02-23 03:15:16 +08:00
|
|
|
// FIXME: DefaultTargetTriple is used by the target-prefixed calls to as/ld
|
|
|
|
// and getToolChain is const.
|
2013-08-14 07:38:57 +08:00
|
|
|
if (IsCLMode()) {
|
2014-03-28 09:19:04 +08:00
|
|
|
// clang-cl targets MSVC-style Win32.
|
2013-08-14 07:38:57 +08:00
|
|
|
llvm::Triple T(DefaultTargetTriple);
|
2014-03-29 04:49:28 +08:00
|
|
|
T.setOS(llvm::Triple::Win32);
|
2015-09-19 01:11:50 +08:00
|
|
|
T.setVendor(llvm::Triple::PC);
|
2014-03-29 04:49:28 +08:00
|
|
|
T.setEnvironment(llvm::Triple::MSVC);
|
2013-08-14 07:38:57 +08:00
|
|
|
DefaultTargetTriple = T.str();
|
|
|
|
}
|
2015-06-23 06:07:27 +08:00
|
|
|
if (const Arg *A = Args.getLastArg(options::OPT_target))
|
2012-11-01 12:30:05 +08:00
|
|
|
DefaultTargetTriple = A->getValue();
|
2015-06-23 06:07:27 +08:00
|
|
|
if (const Arg *A = Args.getLastArg(options::OPT_ccc_install_dir))
|
2012-11-01 12:30:05 +08:00
|
|
|
Dir = InstalledDir = A->getValue();
|
2015-06-23 06:07:27 +08:00
|
|
|
for (const Arg *A : Args.filtered(options::OPT_B)) {
|
2011-02-09 04:31:42 +08:00
|
|
|
A->claim();
|
2012-11-01 12:30:05 +08:00
|
|
|
PrefixDirs.push_back(A->getValue(0));
|
2011-02-09 04:31:42 +08:00
|
|
|
}
|
2015-06-23 06:07:27 +08:00
|
|
|
if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ))
|
2012-11-01 12:30:05 +08:00
|
|
|
SysRoot = A->getValue();
|
2015-06-23 06:07:27 +08:00
|
|
|
if (const Arg *A = Args.getLastArg(options::OPT__dyld_prefix_EQ))
|
2013-05-28 05:40:20 +08:00
|
|
|
DyldPrefix = A->getValue();
|
2015-06-23 06:07:27 +08:00
|
|
|
if (Args.hasArg(options::OPT_nostdlib))
|
2011-03-21 21:59:26 +08:00
|
|
|
UseStdLib = false;
|
2009-03-11 04:52:46 +08:00
|
|
|
|
2015-06-23 06:07:27 +08:00
|
|
|
if (const Arg *A = Args.getLastArg(options::OPT_resource_dir))
|
2013-03-23 13:17:59 +08:00
|
|
|
ResourceDir = A->getValue();
|
2013-03-13 04:17:58 +08:00
|
|
|
|
2015-06-23 06:07:27 +08:00
|
|
|
if (const Arg *A = Args.getLastArg(options::OPT_save_temps_EQ)) {
|
2015-02-03 06:41:48 +08:00
|
|
|
SaveTemps = llvm::StringSwitch<SaveTempsMode>(A->getValue())
|
|
|
|
.Case("cwd", SaveTempsCwd)
|
|
|
|
.Case("obj", SaveTempsObj)
|
|
|
|
.Default(SaveTempsCwd);
|
|
|
|
}
|
|
|
|
|
2016-05-19 01:04:52 +08:00
|
|
|
setLTOMode(Args);
|
|
|
|
|
2016-03-01 09:07:58 +08:00
|
|
|
// Ignore -fembed-bitcode options with LTO
|
|
|
|
// since the output will be bitcode anyway.
|
2016-05-19 01:04:52 +08:00
|
|
|
if (getLTOMode() == LTOK_None) {
|
2016-05-12 00:26:03 +08:00
|
|
|
if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
|
|
|
|
StringRef Name = A->getValue();
|
|
|
|
unsigned Model = llvm::StringSwitch<unsigned>(Name)
|
|
|
|
.Case("off", EmbedNone)
|
|
|
|
.Case("all", EmbedBitcode)
|
|
|
|
.Case("bitcode", EmbedBitcode)
|
|
|
|
.Case("marker", EmbedMarker)
|
|
|
|
.Default(~0U);
|
|
|
|
if (Model == ~0U) {
|
|
|
|
Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
|
|
|
|
<< Name;
|
|
|
|
} else
|
|
|
|
BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
|
|
|
|
}
|
2016-03-01 09:07:58 +08:00
|
|
|
} else {
|
|
|
|
// claim the bitcode option under LTO so no warning is issued.
|
2016-05-12 00:26:03 +08:00
|
|
|
Args.ClaimAllArgs(options::OPT_fembed_bitcode_EQ);
|
2016-03-01 09:07:58 +08:00
|
|
|
}
|
|
|
|
|
2015-06-23 06:07:27 +08:00
|
|
|
std::unique_ptr<llvm::opt::InputArgList> UArgs =
|
|
|
|
llvm::make_unique<InputArgList>(std::move(Args));
|
|
|
|
|
2010-06-12 06:00:26 +08:00
|
|
|
// Perform the default argument translations.
|
2015-06-23 06:07:27 +08:00
|
|
|
DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs);
|
2010-06-12 06:00:26 +08:00
|
|
|
|
2012-01-25 16:49:21 +08:00
|
|
|
// Owned by the host.
|
2016-04-21 18:16:48 +08:00
|
|
|
const ToolChain &TC = getToolChain(
|
|
|
|
*UArgs, computeTargetTriple(*this, DefaultTargetTriple, *UArgs));
|
2012-01-25 16:49:21 +08:00
|
|
|
|
2009-03-18 10:55:38 +08:00
|
|
|
// The compilation takes ownership of Args.
|
2015-06-23 06:07:27 +08:00
|
|
|
Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
|
2009-03-18 10:55:38 +08:00
|
|
|
|
|
|
|
if (!HandleImmediateArgs(*C))
|
|
|
|
return C;
|
2009-03-13 08:51:18 +08:00
|
|
|
|
2011-08-13 06:08:57 +08:00
|
|
|
// Construct the list of inputs.
|
|
|
|
InputList Inputs;
|
2013-08-14 05:32:29 +08:00
|
|
|
BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs);
|
2011-08-13 06:08:57 +08:00
|
|
|
|
[CUDA][OpenMP] Create generic offload toolchains
Summary:
This patch introduces the concept of offloading tool chain and offloading kind. Each tool chain may have associated an offloading kind that marks it as used in a given programming model that requires offloading.
It also adds the logic to iterate on the tool chains based on the kind. Currently, only CUDA is supported, but in general a programming model (an offloading kind) may have associated multiple tool chains that require supporting offloading.
This patch does not add tests - its goal is to keep the existing functionality.
This patch is the first of a series of three that attempts to make the current support of CUDA more generic and easier to extend to other programming models, namely OpenMP. It tries to capture the suggestions/improvements/concerns on the initial proposal in http://lists.llvm.org/pipermail/cfe-dev/2016-February/047547.html. It only tackles the more consensual part of the proposal, i.e.does not address the problem of intermediate files bundling yet.
Reviewers: ABataev, jlebar, echristo, hfinkel, tra
Subscribers: guansong, Hahnfeld, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: http://reviews.llvm.org/D18170
llvm-svn: 272571
2016-06-14 02:10:57 +08:00
|
|
|
// Populate the tool chains for the offloading devices, if any.
|
|
|
|
CreateOffloadingDeviceToolChains(*C, Inputs);
|
2016-03-31 07:30:25 +08:00
|
|
|
|
2012-01-24 18:43:44 +08:00
|
|
|
// Construct the list of abstract actions to perform for this compilation. On
|
2014-01-16 16:48:16 +08:00
|
|
|
// MachO targets this uses the driver-driver and universal actions.
|
|
|
|
if (TC.getTriple().isOSBinFormatMachO())
|
2015-11-18 06:28:40 +08:00
|
|
|
BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
|
2009-03-12 15:58:46 +08:00
|
|
|
else
|
2016-02-11 10:00:50 +08:00
|
|
|
BuildActions(*C, C->getArgs(), Inputs, C->getActions());
|
2009-03-12 15:58:46 +08:00
|
|
|
|
2015-06-26 03:37:41 +08:00
|
|
|
if (CCCPrintPhases) {
|
2009-03-18 11:13:20 +08:00
|
|
|
PrintActions(*C);
|
2009-03-18 10:55:38 +08:00
|
|
|
return C;
|
2009-03-12 15:58:46 +08:00
|
|
|
}
|
2009-03-14 01:24:34 +08:00
|
|
|
|
2009-03-18 10:55:38 +08:00
|
|
|
BuildJobs(*C);
|
2009-03-15 09:38:15 +08:00
|
|
|
|
|
|
|
return C;
|
2009-03-11 04:52:46 +08:00
|
|
|
}
|
|
|
|
|
2015-07-09 14:58:31 +08:00
|
|
|
static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) {
|
|
|
|
llvm::opt::ArgStringList ASL;
|
|
|
|
for (const auto *A : Args)
|
|
|
|
A->render(Args, ASL);
|
|
|
|
|
|
|
|
for (auto I = ASL.begin(), E = ASL.end(); I != E; ++I) {
|
|
|
|
if (I != ASL.begin())
|
|
|
|
OS << ' ';
|
|
|
|
Command::printArg(OS, *I, true);
|
|
|
|
}
|
|
|
|
OS << '\n';
|
|
|
|
}
|
|
|
|
|
2011-08-18 06:59:59 +08:00
|
|
|
// When clang crashes, produce diagnostic information including the fully
|
|
|
|
// preprocessed source file(s). Request that the developer attach the
|
2011-08-03 01:58:04 +08:00
|
|
|
// diagnostic information to a bug report.
|
|
|
|
void Driver::generateCompilationDiagnostics(Compilation &C,
|
2014-10-21 05:02:05 +08:00
|
|
|
const Command &FailingCommand) {
|
2012-02-22 08:30:39 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT_fno_crash_diagnostics))
|
2012-07-10 01:31:28 +08:00
|
|
|
return;
|
2012-03-07 08:30:40 +08:00
|
|
|
|
2013-02-02 02:30:26 +08:00
|
|
|
// Don't try to generate diagnostics for link or dsymutil jobs.
|
2014-10-21 05:02:05 +08:00
|
|
|
if (FailingCommand.getCreator().isLinkJob() ||
|
|
|
|
FailingCommand.getCreator().isDsymutilJob())
|
2012-02-22 08:30:39 +08:00
|
|
|
return;
|
|
|
|
|
2012-06-20 01:51:34 +08:00
|
|
|
// Print the version of the compiler.
|
|
|
|
PrintVersion(C, llvm::errs());
|
|
|
|
|
2011-08-03 01:58:04 +08:00
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< "PLEASE submit a bug report to " BUG_REPORT_URL " and include the "
|
|
|
|
"crash backtrace, preprocessed source, and associated run script.";
|
2011-08-03 01:58:04 +08:00
|
|
|
|
|
|
|
// Suppress driver output and emit preprocessor output to temp file.
|
2013-07-19 04:29:38 +08:00
|
|
|
Mode = CPPMode;
|
2011-08-03 01:58:04 +08:00
|
|
|
CCGenDiagnostics = true;
|
|
|
|
|
2011-11-03 05:29:05 +08:00
|
|
|
// Save the original job command(s).
|
2014-10-22 01:24:44 +08:00
|
|
|
Command Cmd = FailingCommand;
|
2011-11-03 05:29:05 +08:00
|
|
|
|
2012-12-20 10:22:15 +08:00
|
|
|
// Keep track of whether we produce any errors while trying to produce
|
|
|
|
// preprocessed sources.
|
|
|
|
DiagnosticErrorTrap Trap(Diags);
|
|
|
|
|
|
|
|
// Suppress tool output.
|
2011-08-03 01:58:04 +08:00
|
|
|
C.initCompilationForDiagnostics();
|
2011-08-13 06:08:57 +08:00
|
|
|
|
|
|
|
// Construct the list of inputs.
|
|
|
|
InputList Inputs;
|
|
|
|
BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs);
|
2011-08-03 01:58:04 +08:00
|
|
|
|
2011-08-13 07:30:05 +08:00
|
|
|
for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) {
|
2011-08-18 08:22:25 +08:00
|
|
|
bool IgnoreInput = false;
|
|
|
|
|
|
|
|
// Ignore input from stdin or any inputs that cannot be preprocessed.
|
2014-04-29 06:24:44 +08:00
|
|
|
// Check type first as not all linker inputs have a value.
|
2015-06-26 23:47:46 +08:00
|
|
|
if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
|
2014-04-29 06:24:44 +08:00
|
|
|
IgnoreInput = true;
|
|
|
|
} else if (!strcmp(it->second->getValue(), "-")) {
|
2011-08-18 08:22:25 +08:00
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< "Error generating preprocessed source(s) - "
|
|
|
|
"ignoring input from stdin.";
|
2011-08-18 08:22:25 +08:00
|
|
|
IgnoreInput = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IgnoreInput) {
|
2011-08-13 07:30:05 +08:00
|
|
|
it = Inputs.erase(it);
|
|
|
|
ie = Inputs.end();
|
2011-08-18 07:08:45 +08:00
|
|
|
} else {
|
2011-08-13 07:30:05 +08:00
|
|
|
++it;
|
2011-08-18 07:08:45 +08:00
|
|
|
}
|
2011-08-13 07:30:05 +08:00
|
|
|
}
|
2011-08-18 08:22:25 +08:00
|
|
|
|
2013-01-30 07:57:10 +08:00
|
|
|
if (Inputs.empty()) {
|
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< "Error generating preprocessed source(s) - "
|
|
|
|
"no preprocessable inputs.";
|
2013-01-30 07:57:10 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-09-07 07:52:36 +08:00
|
|
|
// Don't attempt to generate preprocessed files if multiple -arch options are
|
2012-02-14 02:16:28 +08:00
|
|
|
// used, unless they're all duplicates.
|
|
|
|
llvm::StringSet<> ArchNames;
|
2014-12-30 03:01:36 +08:00
|
|
|
for (const Arg *A : C.getArgs()) {
|
2011-09-07 07:52:36 +08:00
|
|
|
if (A->getOption().matches(options::OPT_arch)) {
|
2012-11-01 12:30:05 +08:00
|
|
|
StringRef ArchName = A->getValue();
|
2012-02-14 02:16:28 +08:00
|
|
|
ArchNames.insert(ArchName);
|
2011-09-07 07:52:36 +08:00
|
|
|
}
|
|
|
|
}
|
2012-02-14 02:16:28 +08:00
|
|
|
if (ArchNames.size() > 1) {
|
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< "Error generating preprocessed source(s) - cannot generate "
|
|
|
|
"preprocessed source with multiple -arch options.";
|
2012-02-14 02:16:28 +08:00
|
|
|
return;
|
|
|
|
}
|
2011-09-07 07:52:36 +08:00
|
|
|
|
2012-01-24 18:43:44 +08:00
|
|
|
// Construct the list of abstract actions to perform for this compilation. On
|
|
|
|
// Darwin OSes this uses the driver-driver and builds universal actions.
|
2012-01-25 16:49:21 +08:00
|
|
|
const ToolChain &TC = C.getDefaultToolChain();
|
2014-01-16 16:48:16 +08:00
|
|
|
if (TC.getTriple().isOSBinFormatMachO())
|
2015-11-18 06:28:40 +08:00
|
|
|
BuildUniversalActions(C, TC, Inputs);
|
2011-08-03 01:58:04 +08:00
|
|
|
else
|
2016-02-11 10:00:50 +08:00
|
|
|
BuildActions(C, C.getArgs(), Inputs, C.getActions());
|
2011-08-03 01:58:04 +08:00
|
|
|
|
|
|
|
BuildJobs(C);
|
|
|
|
|
|
|
|
// If there were errors building the compilation, quit now.
|
2012-12-20 10:22:15 +08:00
|
|
|
if (Trap.hasErrorOccurred()) {
|
2011-08-03 01:58:04 +08:00
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< "Error generating preprocessed source(s).";
|
2011-08-03 01:58:04 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate preprocessed output.
|
2013-01-30 04:15:05 +08:00
|
|
|
SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
|
2015-07-03 06:52:08 +08:00
|
|
|
C.ExecuteJobs(C.getJobs(), FailingCommands);
|
2011-08-03 01:58:04 +08:00
|
|
|
|
2014-10-21 05:20:27 +08:00
|
|
|
// If any of the preprocessing commands failed, clean up and exit.
|
|
|
|
if (!FailingCommands.empty()) {
|
2015-02-03 06:41:48 +08:00
|
|
|
if (!isSaveTempsEnabled())
|
2011-08-03 01:58:04 +08:00
|
|
|
C.CleanupFileList(C.getTempFiles(), true);
|
|
|
|
|
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< "Error generating preprocessed source(s).";
|
2014-10-21 05:20:27 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-10-21 05:47:56 +08:00
|
|
|
const ArgStringList &TempFiles = C.getTempFiles();
|
|
|
|
if (TempFiles.empty()) {
|
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< "Error generating preprocessed source(s).";
|
2014-10-21 05:47:56 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-10-21 05:20:27 +08:00
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
|
|
|
<< "\n********************\n\n"
|
|
|
|
"PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n"
|
|
|
|
"Preprocessed source(s) and associated run script(s) are located at:";
|
|
|
|
|
2014-10-21 06:47:23 +08:00
|
|
|
SmallString<128> VFS;
|
|
|
|
for (const char *TempFile : TempFiles) {
|
2014-10-21 05:47:56 +08:00
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile;
|
2014-10-21 06:47:23 +08:00
|
|
|
if (StringRef(TempFile).endswith(".cache")) {
|
|
|
|
// In some cases (modules) we'll dump extra data to help with reproducing
|
|
|
|
// the crash into a directory next to the output.
|
|
|
|
VFS = llvm::sys::path::filename(TempFile);
|
|
|
|
llvm::sys::path::append(VFS, "vfs", "vfs.yaml");
|
|
|
|
}
|
|
|
|
}
|
2014-10-21 05:47:56 +08:00
|
|
|
|
|
|
|
// Assume associated files are based off of the first temporary file.
|
2014-10-22 01:24:44 +08:00
|
|
|
CrashReportInfo CrashInfo(TempFiles[0], VFS);
|
2014-10-21 05:47:56 +08:00
|
|
|
|
2014-10-22 01:24:44 +08:00
|
|
|
std::string Script = CrashInfo.Filename.rsplit('.').first.str() + ".sh";
|
2014-10-21 05:47:56 +08:00
|
|
|
std::error_code EC;
|
|
|
|
llvm::raw_fd_ostream ScriptOS(Script, EC, llvm::sys::fs::F_Excl);
|
|
|
|
if (EC) {
|
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
|
|
|
<< "Error generating run script: " + Script + " " + EC.message();
|
|
|
|
} else {
|
2015-03-12 08:14:35 +08:00
|
|
|
ScriptOS << "# Crash reproducer for " << getClangFullVersion() << "\n"
|
2015-07-09 14:58:31 +08:00
|
|
|
<< "# Driver args: ";
|
|
|
|
printArgList(ScriptOS, C.getInputArgs());
|
|
|
|
ScriptOS << "# Original command: ";
|
2015-03-12 08:14:35 +08:00
|
|
|
Cmd.Print(ScriptOS, "\n", /*Quote=*/true);
|
2014-10-22 02:03:08 +08:00
|
|
|
Cmd.Print(ScriptOS, "\n", /*Quote=*/true, &CrashInfo);
|
2014-10-21 05:47:56 +08:00
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg) << Script;
|
2011-08-03 01:58:04 +08:00
|
|
|
}
|
2015-01-12 10:33:09 +08:00
|
|
|
|
|
|
|
for (const auto &A : C.getArgs().filtered(options::OPT_frewrite_map_file,
|
|
|
|
options::OPT_frewrite_map_file_EQ))
|
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg) << A->getValue();
|
|
|
|
|
2014-10-21 05:20:27 +08:00
|
|
|
Diag(clang::diag::note_drv_command_failed_diag_msg)
|
|
|
|
<< "\n\n********************";
|
2011-08-03 01:58:04 +08:00
|
|
|
}
|
|
|
|
|
2015-07-03 06:52:08 +08:00
|
|
|
void Driver::setUpResponseFiles(Compilation &C, Command &Cmd) {
|
2016-01-06 03:54:39 +08:00
|
|
|
// Since commandLineFitsWithinSystemLimits() may underestimate system's capacity
|
Teach Clang how to use response files when calling other tools
Patch by Rafael Auler!
This patch addresses PR15171 and teaches Clang how to call other tools
with response files, when the command line exceeds system limits. This
is a problem for Windows systems, whose maximum command-line length is
32kb.
I introduce the concept of "response file support" for each Tool object.
A given Tool may have full support for response files (e.g. MSVC's
link.exe) or only support file names inside response files, but no flags
(e.g. Apple's ld64, as commented in PR15171), or no support at all (the
default case). Therefore, if you implement a toolchain in the clang
driver and you want clang to be able to use response files in your
tools, you must override a method (getReponseFileSupport()) to tell so.
I designed it to support different kinds of tools and
internationalisation needs:
- VS response files ( UTF-16 )
- GNU tools ( uses system's current code page, windows' legacy intl.
support, with escaped backslashes. On unix, fallback to UTF-8 )
- Clang itself ( UTF-16 on windows, UTF-8 on unix )
- ld64 response files ( only a limited file list, UTF-8 on unix )
With this design, I was able to test input file names with spaces and
international characters for Windows. When the linker input is large
enough, it creates a response file with the correct encoding. On a Mac,
to test ld64, I temporarily changed Clang's behavior to always use
response files regardless of the command size limit (avoiding using huge
command line inputs). I tested clang with the LLVM test suite (compiling
benchmarks) and it did fine.
Test Plan: A LIT test that tests proper response files support. This is
tricky, since, for Unix systems, we need a 2MB response file, otherwise
Clang will simply use regular arguments instead of a response file. To
do this, my LIT test generate the file on the fly by cloning many -DTEST
parameters until we have a 2MB file. I found out that processing 2MB of
arguments is pretty slow, it takes 1 minute using my notebook in a debug
build, or 10s in a Release build. Therefore, I also added "REQUIRES:
long_tests", so it will only run when the user wants to run long tests.
In the full discussion in
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html,
Rafael Espindola discusses a proper way to test
llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler
suggests to use 10 times the current system limit (20MB resp file), so
we guarantee that the system will always use response file, even if a
new linux comes up that can handle a few more bytes of arguments.
However, by testing with a 20MB resp file, the test takes long 8 minutes
just to perform a silly check to see if the driver will use a response
file. I found it to be unreasonable. Thus, I discarded this approach and
uses a 2MB response file, which should be enough.
Reviewers: asl, rafael, silvas
Reviewed By: silvas
Subscribers: silvas, rnk, thakis, cfe-commits
Differential Revision: http://reviews.llvm.org/D4897
llvm-svn: 217792
2014-09-16 01:45:39 +08:00
|
|
|
// if the tool does not support response files, there is a chance/ that things
|
|
|
|
// will just work without a response file, so we silently just skip it.
|
2015-07-03 06:52:08 +08:00
|
|
|
if (Cmd.getCreator().getResponseFilesSupport() == Tool::RF_None ||
|
2016-01-06 03:54:39 +08:00
|
|
|
llvm::sys::commandLineFitsWithinSystemLimits(Cmd.getExecutable(), Cmd.getArguments()))
|
Teach Clang how to use response files when calling other tools
Patch by Rafael Auler!
This patch addresses PR15171 and teaches Clang how to call other tools
with response files, when the command line exceeds system limits. This
is a problem for Windows systems, whose maximum command-line length is
32kb.
I introduce the concept of "response file support" for each Tool object.
A given Tool may have full support for response files (e.g. MSVC's
link.exe) or only support file names inside response files, but no flags
(e.g. Apple's ld64, as commented in PR15171), or no support at all (the
default case). Therefore, if you implement a toolchain in the clang
driver and you want clang to be able to use response files in your
tools, you must override a method (getReponseFileSupport()) to tell so.
I designed it to support different kinds of tools and
internationalisation needs:
- VS response files ( UTF-16 )
- GNU tools ( uses system's current code page, windows' legacy intl.
support, with escaped backslashes. On unix, fallback to UTF-8 )
- Clang itself ( UTF-16 on windows, UTF-8 on unix )
- ld64 response files ( only a limited file list, UTF-8 on unix )
With this design, I was able to test input file names with spaces and
international characters for Windows. When the linker input is large
enough, it creates a response file with the correct encoding. On a Mac,
to test ld64, I temporarily changed Clang's behavior to always use
response files regardless of the command size limit (avoiding using huge
command line inputs). I tested clang with the LLVM test suite (compiling
benchmarks) and it did fine.
Test Plan: A LIT test that tests proper response files support. This is
tricky, since, for Unix systems, we need a 2MB response file, otherwise
Clang will simply use regular arguments instead of a response file. To
do this, my LIT test generate the file on the fly by cloning many -DTEST
parameters until we have a 2MB file. I found out that processing 2MB of
arguments is pretty slow, it takes 1 minute using my notebook in a debug
build, or 10s in a Release build. Therefore, I also added "REQUIRES:
long_tests", so it will only run when the user wants to run long tests.
In the full discussion in
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html,
Rafael Espindola discusses a proper way to test
llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler
suggests to use 10 times the current system limit (20MB resp file), so
we guarantee that the system will always use response file, even if a
new linux comes up that can handle a few more bytes of arguments.
However, by testing with a 20MB resp file, the test takes long 8 minutes
just to perform a silly check to see if the driver will use a response
file. I found it to be unreasonable. Thus, I discarded this approach and
uses a 2MB response file, which should be enough.
Reviewers: asl, rafael, silvas
Reviewed By: silvas
Subscribers: silvas, rnk, thakis, cfe-commits
Differential Revision: http://reviews.llvm.org/D4897
llvm-svn: 217792
2014-09-16 01:45:39 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
std::string TmpName = GetTemporaryPath("response", "txt");
|
2015-07-03 06:52:08 +08:00
|
|
|
Cmd.setResponseFile(
|
2015-06-26 23:47:46 +08:00
|
|
|
C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())));
|
Teach Clang how to use response files when calling other tools
Patch by Rafael Auler!
This patch addresses PR15171 and teaches Clang how to call other tools
with response files, when the command line exceeds system limits. This
is a problem for Windows systems, whose maximum command-line length is
32kb.
I introduce the concept of "response file support" for each Tool object.
A given Tool may have full support for response files (e.g. MSVC's
link.exe) or only support file names inside response files, but no flags
(e.g. Apple's ld64, as commented in PR15171), or no support at all (the
default case). Therefore, if you implement a toolchain in the clang
driver and you want clang to be able to use response files in your
tools, you must override a method (getReponseFileSupport()) to tell so.
I designed it to support different kinds of tools and
internationalisation needs:
- VS response files ( UTF-16 )
- GNU tools ( uses system's current code page, windows' legacy intl.
support, with escaped backslashes. On unix, fallback to UTF-8 )
- Clang itself ( UTF-16 on windows, UTF-8 on unix )
- ld64 response files ( only a limited file list, UTF-8 on unix )
With this design, I was able to test input file names with spaces and
international characters for Windows. When the linker input is large
enough, it creates a response file with the correct encoding. On a Mac,
to test ld64, I temporarily changed Clang's behavior to always use
response files regardless of the command size limit (avoiding using huge
command line inputs). I tested clang with the LLVM test suite (compiling
benchmarks) and it did fine.
Test Plan: A LIT test that tests proper response files support. This is
tricky, since, for Unix systems, we need a 2MB response file, otherwise
Clang will simply use regular arguments instead of a response file. To
do this, my LIT test generate the file on the fly by cloning many -DTEST
parameters until we have a 2MB file. I found out that processing 2MB of
arguments is pretty slow, it takes 1 minute using my notebook in a debug
build, or 10s in a Release build. Therefore, I also added "REQUIRES:
long_tests", so it will only run when the user wants to run long tests.
In the full discussion in
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html,
Rafael Espindola discusses a proper way to test
llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler
suggests to use 10 times the current system limit (20MB resp file), so
we guarantee that the system will always use response file, even if a
new linux comes up that can handle a few more bytes of arguments.
However, by testing with a 20MB resp file, the test takes long 8 minutes
just to perform a silly check to see if the driver will use a response
file. I found it to be unreasonable. Thus, I discarded this approach and
uses a 2MB response file, which should be enough.
Reviewers: asl, rafael, silvas
Reviewed By: silvas
Subscribers: silvas, rnk, thakis, cfe-commits
Differential Revision: http://reviews.llvm.org/D4897
llvm-svn: 217792
2014-09-16 01:45:39 +08:00
|
|
|
}
|
|
|
|
|
2015-06-26 23:47:46 +08:00
|
|
|
int Driver::ExecuteCompilation(
|
|
|
|
Compilation &C,
|
|
|
|
SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands) {
|
2009-07-02 04:03:04 +08:00
|
|
|
// Just print if -### was present.
|
|
|
|
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
|
2013-09-13 02:23:34 +08:00
|
|
|
C.getJobs().Print(llvm::errs(), "\n", true);
|
2009-07-02 04:03:04 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there were errors building the compilation, quit now.
|
2011-08-03 01:58:04 +08:00
|
|
|
if (Diags.hasErrorOccurred())
|
2009-07-02 04:03:04 +08:00
|
|
|
return 1;
|
|
|
|
|
Teach Clang how to use response files when calling other tools
Patch by Rafael Auler!
This patch addresses PR15171 and teaches Clang how to call other tools
with response files, when the command line exceeds system limits. This
is a problem for Windows systems, whose maximum command-line length is
32kb.
I introduce the concept of "response file support" for each Tool object.
A given Tool may have full support for response files (e.g. MSVC's
link.exe) or only support file names inside response files, but no flags
(e.g. Apple's ld64, as commented in PR15171), or no support at all (the
default case). Therefore, if you implement a toolchain in the clang
driver and you want clang to be able to use response files in your
tools, you must override a method (getReponseFileSupport()) to tell so.
I designed it to support different kinds of tools and
internationalisation needs:
- VS response files ( UTF-16 )
- GNU tools ( uses system's current code page, windows' legacy intl.
support, with escaped backslashes. On unix, fallback to UTF-8 )
- Clang itself ( UTF-16 on windows, UTF-8 on unix )
- ld64 response files ( only a limited file list, UTF-8 on unix )
With this design, I was able to test input file names with spaces and
international characters for Windows. When the linker input is large
enough, it creates a response file with the correct encoding. On a Mac,
to test ld64, I temporarily changed Clang's behavior to always use
response files regardless of the command size limit (avoiding using huge
command line inputs). I tested clang with the LLVM test suite (compiling
benchmarks) and it did fine.
Test Plan: A LIT test that tests proper response files support. This is
tricky, since, for Unix systems, we need a 2MB response file, otherwise
Clang will simply use regular arguments instead of a response file. To
do this, my LIT test generate the file on the fly by cloning many -DTEST
parameters until we have a 2MB file. I found out that processing 2MB of
arguments is pretty slow, it takes 1 minute using my notebook in a debug
build, or 10s in a Release build. Therefore, I also added "REQUIRES:
long_tests", so it will only run when the user wants to run long tests.
In the full discussion in
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html,
Rafael Espindola discusses a proper way to test
llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler
suggests to use 10 times the current system limit (20MB resp file), so
we guarantee that the system will always use response file, even if a
new linux comes up that can handle a few more bytes of arguments.
However, by testing with a 20MB resp file, the test takes long 8 minutes
just to perform a silly check to see if the driver will use a response
file. I found it to be unreasonable. Thus, I discarded this approach and
uses a 2MB response file, which should be enough.
Reviewers: asl, rafael, silvas
Reviewed By: silvas
Subscribers: silvas, rnk, thakis, cfe-commits
Differential Revision: http://reviews.llvm.org/D4897
llvm-svn: 217792
2014-09-16 01:45:39 +08:00
|
|
|
// Set up response file names for each command, if necessary
|
2015-07-03 06:52:08 +08:00
|
|
|
for (auto &Job : C.getJobs())
|
|
|
|
setUpResponseFiles(C, Job);
|
Teach Clang how to use response files when calling other tools
Patch by Rafael Auler!
This patch addresses PR15171 and teaches Clang how to call other tools
with response files, when the command line exceeds system limits. This
is a problem for Windows systems, whose maximum command-line length is
32kb.
I introduce the concept of "response file support" for each Tool object.
A given Tool may have full support for response files (e.g. MSVC's
link.exe) or only support file names inside response files, but no flags
(e.g. Apple's ld64, as commented in PR15171), or no support at all (the
default case). Therefore, if you implement a toolchain in the clang
driver and you want clang to be able to use response files in your
tools, you must override a method (getReponseFileSupport()) to tell so.
I designed it to support different kinds of tools and
internationalisation needs:
- VS response files ( UTF-16 )
- GNU tools ( uses system's current code page, windows' legacy intl.
support, with escaped backslashes. On unix, fallback to UTF-8 )
- Clang itself ( UTF-16 on windows, UTF-8 on unix )
- ld64 response files ( only a limited file list, UTF-8 on unix )
With this design, I was able to test input file names with spaces and
international characters for Windows. When the linker input is large
enough, it creates a response file with the correct encoding. On a Mac,
to test ld64, I temporarily changed Clang's behavior to always use
response files regardless of the command size limit (avoiding using huge
command line inputs). I tested clang with the LLVM test suite (compiling
benchmarks) and it did fine.
Test Plan: A LIT test that tests proper response files support. This is
tricky, since, for Unix systems, we need a 2MB response file, otherwise
Clang will simply use regular arguments instead of a response file. To
do this, my LIT test generate the file on the fly by cloning many -DTEST
parameters until we have a 2MB file. I found out that processing 2MB of
arguments is pretty slow, it takes 1 minute using my notebook in a debug
build, or 10s in a Release build. Therefore, I also added "REQUIRES:
long_tests", so it will only run when the user wants to run long tests.
In the full discussion in
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html,
Rafael Espindola discusses a proper way to test
llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler
suggests to use 10 times the current system limit (20MB resp file), so
we guarantee that the system will always use response file, even if a
new linux comes up that can handle a few more bytes of arguments.
However, by testing with a 20MB resp file, the test takes long 8 minutes
just to perform a silly check to see if the driver will use a response
file. I found it to be unreasonable. Thus, I discarded this approach and
uses a 2MB response file, which should be enough.
Reviewers: asl, rafael, silvas
Reviewed By: silvas
Subscribers: silvas, rnk, thakis, cfe-commits
Differential Revision: http://reviews.llvm.org/D4897
llvm-svn: 217792
2014-09-16 01:45:39 +08:00
|
|
|
|
2015-07-03 06:52:08 +08:00
|
|
|
C.ExecuteJobs(C.getJobs(), FailingCommands);
|
2009-09-09 07:36:43 +08:00
|
|
|
|
2009-07-02 04:03:04 +08:00
|
|
|
// Remove temp files.
|
|
|
|
C.CleanupFileList(C.getTempFiles());
|
|
|
|
|
2010-05-22 08:37:20 +08:00
|
|
|
// If the command succeeded, we are done.
|
2013-01-30 04:15:05 +08:00
|
|
|
if (FailingCommands.empty())
|
|
|
|
return 0;
|
2010-05-22 08:37:20 +08:00
|
|
|
|
2013-01-30 04:15:05 +08:00
|
|
|
// Otherwise, remove result files and print extra information about abnormal
|
|
|
|
// failures.
|
2015-06-26 02:51:37 +08:00
|
|
|
for (const auto &CmdPair : FailingCommands) {
|
|
|
|
int Res = CmdPair.first;
|
|
|
|
const Command *FailingCommand = CmdPair.second;
|
2009-07-02 04:03:04 +08:00
|
|
|
|
2013-01-30 04:15:05 +08:00
|
|
|
// Remove result files if we're not saving temps.
|
2015-02-03 06:41:48 +08:00
|
|
|
if (!isSaveTempsEnabled()) {
|
2013-01-30 04:15:05 +08:00
|
|
|
const JobAction *JA = cast<JobAction>(&FailingCommand->getSource());
|
|
|
|
C.CleanupFileMap(C.getResultFiles(), JA, true);
|
2011-11-21 08:01:05 +08:00
|
|
|
|
2013-01-30 04:15:05 +08:00
|
|
|
// Failure result files are valid unless we crashed.
|
|
|
|
if (Res < 0)
|
|
|
|
C.CleanupFileMap(C.getFailureResultFiles(), JA, true);
|
|
|
|
}
|
2009-07-02 04:03:04 +08:00
|
|
|
|
2013-01-30 04:15:05 +08:00
|
|
|
// Print extra information about abnormal failures, if possible.
|
|
|
|
//
|
|
|
|
// This is ad-hoc, but we don't want to be excessively noisy. If the result
|
2014-06-27 04:59:36 +08:00
|
|
|
// status was 1, assume the command failed normally. In particular, if it
|
2013-01-30 04:15:05 +08:00
|
|
|
// was the compiler then assume it gave a reasonable error code. Failures
|
|
|
|
// in other tools are less common, and they generally have worse
|
|
|
|
// diagnostics, so always print the diagnostic there.
|
|
|
|
const Tool &FailingTool = FailingCommand->getCreator();
|
|
|
|
|
|
|
|
if (!FailingCommand->getCreator().hasGoodDiagnostics() || Res != 1) {
|
|
|
|
// FIXME: See FIXME above regarding result code interpretation.
|
|
|
|
if (Res < 0)
|
|
|
|
Diag(clang::diag::err_drv_command_signalled)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< FailingTool.getShortName();
|
2013-01-30 04:15:05 +08:00
|
|
|
else
|
2015-06-26 23:47:46 +08:00
|
|
|
Diag(clang::diag::err_drv_command_failed) << FailingTool.getShortName()
|
|
|
|
<< Res;
|
2013-01-30 04:15:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
2009-07-02 04:03:04 +08:00
|
|
|
}
|
|
|
|
|
2009-04-16 00:34:29 +08:00
|
|
|
void Driver::PrintHelp(bool ShowHidden) const {
|
2013-07-27 08:23:45 +08:00
|
|
|
unsigned IncludedFlagsBitmask;
|
|
|
|
unsigned ExcludedFlagsBitmask;
|
2014-03-02 21:01:17 +08:00
|
|
|
std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
|
2015-06-26 23:47:46 +08:00
|
|
|
getIncludeExcludeOptionFlagMasks();
|
2013-07-27 08:23:45 +08:00
|
|
|
|
|
|
|
ExcludedFlagsBitmask |= options::NoDriverOption;
|
|
|
|
if (!ShowHidden)
|
|
|
|
ExcludedFlagsBitmask |= HelpHidden;
|
|
|
|
|
|
|
|
getOpts().PrintHelp(llvm::outs(), Name.c_str(), DriverTitle.c_str(),
|
|
|
|
IncludedFlagsBitmask, ExcludedFlagsBitmask);
|
2009-04-01 05:38:17 +08:00
|
|
|
}
|
|
|
|
|
2011-07-23 18:55:15 +08:00
|
|
|
void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
|
2009-09-09 07:36:43 +08:00
|
|
|
// FIXME: The following handlers should use a callback mechanism, we don't
|
|
|
|
// know what the client would like to do.
|
2010-01-23 10:11:34 +08:00
|
|
|
OS << getClangFullVersion() << '\n';
|
2009-03-27 00:09:13 +08:00
|
|
|
const ToolChain &TC = C.getDefaultToolChain();
|
2009-07-22 04:06:58 +08:00
|
|
|
OS << "Target: " << TC.getTripleString() << '\n';
|
2009-06-17 07:32:58 +08:00
|
|
|
|
|
|
|
// Print the threading model.
|
2014-10-04 05:57:44 +08:00
|
|
|
if (Arg *A = C.getArgs().getLastArg(options::OPT_mthread_model)) {
|
|
|
|
// Don't print if the ToolChain would have barfed on it already
|
|
|
|
if (TC.isThreadModelSupported(A->getValue()))
|
|
|
|
OS << "Thread model: " << A->getValue();
|
|
|
|
} else
|
|
|
|
OS << "Thread model: " << TC.getThreadModel();
|
|
|
|
OS << '\n';
|
2015-08-06 01:07:33 +08:00
|
|
|
|
|
|
|
// Print out the install directory.
|
|
|
|
OS << "InstalledDir: " << InstalledDir << '\n';
|
2009-03-13 08:51:18 +08:00
|
|
|
}
|
|
|
|
|
2010-05-05 13:53:24 +08:00
|
|
|
/// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
|
|
|
|
/// option.
|
2011-07-23 18:55:15 +08:00
|
|
|
static void PrintDiagnosticCategories(raw_ostream &OS) {
|
2011-05-25 13:05:01 +08:00
|
|
|
// Skip the empty category.
|
2015-06-26 23:47:46 +08:00
|
|
|
for (unsigned i = 1, max = DiagnosticIDs::getNumberOfCategories(); i != max;
|
|
|
|
++i)
|
2011-05-25 13:05:01 +08:00
|
|
|
OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n';
|
2010-05-05 13:53:24 +08:00
|
|
|
}
|
|
|
|
|
2009-03-18 10:55:38 +08:00
|
|
|
bool Driver::HandleImmediateArgs(const Compilation &C) {
|
2010-06-12 06:00:19 +08:00
|
|
|
// The order these options are handled in gcc is all over the place, but we
|
2009-09-09 07:36:43 +08:00
|
|
|
// don't expect inconsistencies w.r.t. that to matter in practice.
|
2009-04-01 05:38:17 +08:00
|
|
|
|
2010-09-17 10:47:28 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT_dumpmachine)) {
|
|
|
|
llvm::outs() << C.getDefaultToolChain().getTripleString() << '\n';
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-04-04 13:17:38 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT_dumpversion)) {
|
2011-01-12 08:43:47 +08:00
|
|
|
// Since -dumpversion is only implemented for pedantic GCC compatibility, we
|
|
|
|
// return an answer which matches our definition of __VERSION__.
|
|
|
|
//
|
|
|
|
// If we want to return a more correct answer some day, then we should
|
|
|
|
// introduce a non-pedantically GCC compatible mode to Clang in which we
|
|
|
|
// provide sensible definitions for -dumpversion, __VERSION__, etc.
|
|
|
|
llvm::outs() << "4.2.1\n";
|
2009-04-04 13:17:38 +08:00
|
|
|
return false;
|
|
|
|
}
|
2010-06-15 05:23:12 +08:00
|
|
|
|
2010-05-05 13:53:24 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT__print_diagnostic_categories)) {
|
|
|
|
PrintDiagnosticCategories(llvm::outs());
|
|
|
|
return false;
|
|
|
|
}
|
2009-04-04 13:17:38 +08:00
|
|
|
|
2012-05-01 22:57:16 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT_help) ||
|
2009-04-16 00:34:29 +08:00
|
|
|
C.getArgs().hasArg(options::OPT__help_hidden)) {
|
|
|
|
PrintHelp(C.getArgs().hasArg(options::OPT__help_hidden));
|
2009-04-01 05:38:17 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-04-02 23:05:41 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT__version)) {
|
2009-09-09 07:36:43 +08:00
|
|
|
// Follow gcc behavior and use stdout for --version and stderr for -v.
|
2009-07-22 04:06:58 +08:00
|
|
|
PrintVersion(C, llvm::outs());
|
2009-04-02 23:05:41 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT_v) ||
|
2009-03-18 10:55:38 +08:00
|
|
|
C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
|
2009-07-22 04:06:58 +08:00
|
|
|
PrintVersion(C, llvm::errs());
|
2009-03-13 08:51:18 +08:00
|
|
|
SuppressMissingInputWarning = true;
|
|
|
|
}
|
|
|
|
|
2009-03-18 10:55:38 +08:00
|
|
|
const ToolChain &TC = C.getDefaultToolChain();
|
2013-07-31 01:57:09 +08:00
|
|
|
|
|
|
|
if (C.getArgs().hasArg(options::OPT_v))
|
|
|
|
TC.printVerboseInfo(llvm::errs());
|
|
|
|
|
2009-03-20 12:37:21 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT_print_search_dirs)) {
|
|
|
|
llvm::outs() << "programs: =";
|
2015-06-24 06:43:50 +08:00
|
|
|
bool separator = false;
|
|
|
|
for (const std::string &Path : TC.getProgramPaths()) {
|
2015-06-26 23:47:46 +08:00
|
|
|
if (separator)
|
|
|
|
llvm::outs() << ':';
|
2015-06-24 06:43:50 +08:00
|
|
|
llvm::outs() << Path;
|
|
|
|
separator = true;
|
2009-03-20 12:37:21 +08:00
|
|
|
}
|
|
|
|
llvm::outs() << "\n";
|
2011-09-06 10:08:31 +08:00
|
|
|
llvm::outs() << "libraries: =" << ResourceDir;
|
2011-07-16 18:50:05 +08:00
|
|
|
|
2012-04-16 12:16:43 +08:00
|
|
|
StringRef sysroot = C.getSysRoot();
|
2011-07-16 18:50:05 +08:00
|
|
|
|
2015-06-24 06:43:50 +08:00
|
|
|
for (const std::string &Path : TC.getFilePaths()) {
|
|
|
|
// Always print a separator. ResourceDir was the first item shown.
|
2011-09-06 10:08:31 +08:00
|
|
|
llvm::outs() << ':';
|
2015-06-24 06:43:50 +08:00
|
|
|
// Interpretation of leading '=' is needed only for NetBSD.
|
|
|
|
if (Path[0] == '=')
|
2015-06-24 23:10:30 +08:00
|
|
|
llvm::outs() << sysroot << Path.substr(1);
|
2011-07-16 18:50:05 +08:00
|
|
|
else
|
2015-06-24 06:43:50 +08:00
|
|
|
llvm::outs() << Path;
|
2009-03-20 12:37:21 +08:00
|
|
|
}
|
|
|
|
llvm::outs() << "\n";
|
2009-04-01 05:38:17 +08:00
|
|
|
return false;
|
2009-03-20 12:37:21 +08:00
|
|
|
}
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// FIXME: The following handlers should use a callback mechanism, we don't
|
|
|
|
// know what the client would like to do.
|
2009-03-18 10:55:38 +08:00
|
|
|
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_file_name_EQ)) {
|
2012-11-01 12:30:05 +08:00
|
|
|
llvm::outs() << GetFilePath(A->getValue(), TC) << "\n";
|
2009-03-13 08:51:18 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-03-18 10:55:38 +08:00
|
|
|
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_prog_name_EQ)) {
|
2012-11-01 12:30:05 +08:00
|
|
|
llvm::outs() << GetProgramPath(A->getValue(), TC) << "\n";
|
2009-03-13 08:51:18 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-03-18 10:55:38 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
|
2016-10-10 20:23:40 +08:00
|
|
|
ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
|
|
|
|
switch (RLT) {
|
|
|
|
case ToolChain::RLT_CompilerRT:
|
|
|
|
llvm::outs() << TC.getCompilerRT(C.getArgs(), "builtins") << "\n";
|
|
|
|
break;
|
|
|
|
case ToolChain::RLT_Libgcc:
|
|
|
|
llvm::outs() << GetFilePath("libgcc.a", TC) << "\n";
|
|
|
|
break;
|
|
|
|
}
|
2009-03-13 08:51:18 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-06-17 07:25:22 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT_print_multi_lib)) {
|
2015-07-01 03:32:57 +08:00
|
|
|
for (const Multilib &Multilib : TC.getMultilibs())
|
|
|
|
llvm::outs() << Multilib << "\n";
|
2009-06-17 07:25:22 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-02-12 11:21:20 +08:00
|
|
|
if (C.getArgs().hasArg(options::OPT_print_multi_directory)) {
|
2015-07-01 03:32:57 +08:00
|
|
|
for (const Multilib &Multilib : TC.getMultilibs()) {
|
2015-06-26 02:51:37 +08:00
|
|
|
if (Multilib.gccSuffix().empty())
|
2014-02-12 11:21:20 +08:00
|
|
|
llvm::outs() << ".\n";
|
|
|
|
else {
|
2015-06-26 02:51:37 +08:00
|
|
|
StringRef Suffix(Multilib.gccSuffix());
|
2014-02-12 11:21:20 +08:00
|
|
|
assert(Suffix.front() == '/');
|
|
|
|
llvm::outs() << Suffix.substr(1) << "\n";
|
|
|
|
}
|
2014-02-12 09:36:51 +08:00
|
|
|
}
|
2014-02-12 09:29:25 +08:00
|
|
|
return false;
|
|
|
|
}
|
2009-03-13 08:51:18 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-06-11 23:05:22 +08:00
|
|
|
// Display an action graph human-readably. Action A is the "sink" node
|
|
|
|
// and latest-occuring action. Traversal is in pre-order, visiting the
|
|
|
|
// inputs to each action before printing the action itself.
|
2009-09-09 07:36:43 +08:00
|
|
|
static unsigned PrintActions1(const Compilation &C, Action *A,
|
2015-06-26 23:47:46 +08:00
|
|
|
std::map<Action *, unsigned> &Ids) {
|
2015-06-11 23:05:22 +08:00
|
|
|
if (Ids.count(A)) // A was already visited.
|
2009-03-13 20:19:02 +08:00
|
|
|
return Ids[A];
|
2009-09-09 07:36:43 +08:00
|
|
|
|
2009-03-13 20:19:02 +08:00
|
|
|
std::string str;
|
|
|
|
llvm::raw_string_ostream os(str);
|
2009-09-09 07:36:43 +08:00
|
|
|
|
2009-03-13 20:19:02 +08:00
|
|
|
os << Action::getClassName(A->getKind()) << ", ";
|
2009-09-09 07:36:43 +08:00
|
|
|
if (InputAction *IA = dyn_cast<InputAction>(A)) {
|
2012-11-01 12:30:05 +08:00
|
|
|
os << "\"" << IA->getInputArg().getValue() << "\"";
|
2009-03-13 20:19:02 +08:00
|
|
|
} else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
|
2015-06-26 03:37:41 +08:00
|
|
|
os << '"' << BIA->getArchName() << '"' << ", {"
|
2016-02-24 03:30:43 +08:00
|
|
|
<< PrintActions1(C, *BIA->input_begin(), Ids) << "}";
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
} else if (OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
|
|
|
|
bool IsFirst = true;
|
|
|
|
OA->doOnEachDependence(
|
|
|
|
[&](Action *A, const ToolChain *TC, const char *BoundArch) {
|
|
|
|
// E.g. for two CUDA device dependences whose bound arch is sm_20 and
|
|
|
|
// sm_35 this will generate:
|
|
|
|
// "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device"
|
|
|
|
// (nvptx64-nvidia-cuda:sm_35) {#ID}
|
|
|
|
if (!IsFirst)
|
|
|
|
os << ", ";
|
|
|
|
os << '"';
|
|
|
|
if (TC)
|
|
|
|
os << A->getOffloadingKindPrefix();
|
|
|
|
else
|
|
|
|
os << "host";
|
|
|
|
os << " (";
|
|
|
|
os << TC->getTriple().normalize();
|
|
|
|
|
|
|
|
if (BoundArch)
|
|
|
|
os << ":" << BoundArch;
|
|
|
|
os << ")";
|
|
|
|
os << '"';
|
|
|
|
os << " {" << PrintActions1(C, A, Ids) << "}";
|
|
|
|
IsFirst = false;
|
|
|
|
});
|
2009-03-13 20:19:02 +08:00
|
|
|
} else {
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
const ActionList *AL = &A->getInputs();
|
2015-07-14 07:27:56 +08:00
|
|
|
|
2015-09-23 01:23:09 +08:00
|
|
|
if (AL->size()) {
|
|
|
|
const char *Prefix = "{";
|
|
|
|
for (Action *PreRequisite : *AL) {
|
|
|
|
os << Prefix << PrintActions1(C, PreRequisite, Ids);
|
|
|
|
Prefix = ", ";
|
|
|
|
}
|
|
|
|
os << "}";
|
|
|
|
} else
|
|
|
|
os << "{}";
|
2009-03-13 20:19:02 +08:00
|
|
|
}
|
|
|
|
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
// Append offload info for all options other than the offloading action
|
|
|
|
// itself (e.g. (cuda-device, sm_20) or (cuda-host)).
|
|
|
|
std::string offload_str;
|
|
|
|
llvm::raw_string_ostream offload_os(offload_str);
|
|
|
|
if (!isa<OffloadAction>(A)) {
|
|
|
|
auto S = A->getOffloadingKindPrefix();
|
|
|
|
if (!S.empty()) {
|
|
|
|
offload_os << ", (" << S;
|
|
|
|
if (A->getOffloadingArch())
|
|
|
|
offload_os << ", " << A->getOffloadingArch();
|
|
|
|
offload_os << ")";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-03-13 20:19:02 +08:00
|
|
|
unsigned Id = Ids.size();
|
|
|
|
Ids[A] = Id;
|
2009-09-09 07:36:43 +08:00
|
|
|
llvm::errs() << Id << ": " << os.str() << ", "
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
<< types::getTypeName(A->getType()) << offload_os.str() << "\n";
|
2009-03-13 20:19:02 +08:00
|
|
|
|
|
|
|
return Id;
|
|
|
|
}
|
|
|
|
|
2015-06-11 23:05:22 +08:00
|
|
|
// Print the action graphs in a compilation C.
|
|
|
|
// For example "clang -c file1.c file2.c" is composed of two subgraphs.
|
2009-03-18 11:13:20 +08:00
|
|
|
void Driver::PrintActions(const Compilation &C) const {
|
2015-06-26 03:37:41 +08:00
|
|
|
std::map<Action *, unsigned> Ids;
|
|
|
|
for (Action *A : C.getActions())
|
|
|
|
PrintActions1(C, A, Ids);
|
2009-03-12 15:58:46 +08:00
|
|
|
}
|
|
|
|
|
2011-05-06 22:05:11 +08:00
|
|
|
/// \brief Check whether the given input tree contains any compilation or
|
|
|
|
/// assembly actions.
|
|
|
|
static bool ContainsCompileOrAssembleAction(const Action *A) {
|
2015-06-26 23:47:46 +08:00
|
|
|
if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A) ||
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
isa<AssembleJobAction>(A))
|
2010-06-30 00:38:33 +08:00
|
|
|
return true;
|
|
|
|
|
2016-02-24 03:30:43 +08:00
|
|
|
for (const Action *Input : A->inputs())
|
2015-09-20 05:36:51 +08:00
|
|
|
if (ContainsCompileOrAssembleAction(Input))
|
2010-06-30 00:38:33 +08:00
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-11-18 06:28:40 +08:00
|
|
|
void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
|
|
|
|
const InputList &BAInputs) const {
|
|
|
|
DerivedArgList &Args = C.getArgs();
|
|
|
|
ActionList &Actions = C.getActions();
|
2009-09-09 07:36:43 +08:00
|
|
|
llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
|
|
|
|
// Collect the list of architectures. Duplicates are allowed, but should only
|
|
|
|
// be handled once (in the order seen).
|
2009-03-14 04:33:35 +08:00
|
|
|
llvm::StringSet<> ArchNames;
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<const char *, 4> Archs;
|
2014-12-30 03:01:36 +08:00
|
|
|
for (Arg *A : Args) {
|
2009-11-19 11:26:40 +08:00
|
|
|
if (A->getOption().matches(options::OPT_arch)) {
|
2009-09-09 07:37:30 +08:00
|
|
|
// Validate the option here; we don't save the type here because its
|
|
|
|
// particular spelling may participate in other driver choices.
|
|
|
|
llvm::Triple::ArchType Arch =
|
2015-06-26 23:47:46 +08:00
|
|
|
tools::darwin::getArchTypeForMachOArchName(A->getValue());
|
2009-09-09 07:37:30 +08:00
|
|
|
if (Arch == llvm::Triple::UnknownArch) {
|
2015-06-26 23:47:46 +08:00
|
|
|
Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args);
|
2009-09-09 07:37:30 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2009-03-19 15:55:12 +08:00
|
|
|
A->claim();
|
2014-11-19 10:56:13 +08:00
|
|
|
if (ArchNames.insert(A->getValue()).second)
|
2012-11-01 12:30:05 +08:00
|
|
|
Archs.push_back(A->getValue());
|
2009-03-13 02:40:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// When there is no explicit arch for this platform, make sure we still bind
|
|
|
|
// the architecture (to the default) so that -Xarch_ is handled correctly.
|
2009-03-18 11:13:20 +08:00
|
|
|
if (!Archs.size())
|
2012-11-08 11:38:26 +08:00
|
|
|
Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
|
2009-03-13 02:40:18 +08:00
|
|
|
|
|
|
|
ActionList SingleActions;
|
2016-02-11 10:00:50 +08:00
|
|
|
BuildActions(C, Args, BAInputs, SingleActions);
|
2009-03-13 02:40:18 +08:00
|
|
|
|
2010-06-05 02:28:41 +08:00
|
|
|
// Add in arch bindings for every top level action, as well as lipo and
|
|
|
|
// dsymutil steps if needed.
|
2015-09-20 05:36:51 +08:00
|
|
|
for (Action* Act : SingleActions) {
|
2009-09-09 07:36:43 +08:00
|
|
|
// Make sure we can lipo this kind of output. If not (and it is an actual
|
|
|
|
// output) then we disallow, since we can't create an output file with the
|
|
|
|
// right name without overwriting it. We could remove this oddity by just
|
|
|
|
// changing the output names to include the arch, which would also fix
|
2009-03-13 02:40:18 +08:00
|
|
|
// -save-temps. Compatibility wins for now.
|
|
|
|
|
2009-03-14 01:46:02 +08:00
|
|
|
if (Archs.size() > 1 && !types::canLipoType(Act->getType()))
|
2009-03-13 02:40:18 +08:00
|
|
|
Diag(clang::diag::err_drv_invalid_output_with_multiple_archs)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< types::getTypeName(Act->getType());
|
2009-03-13 02:40:18 +08:00
|
|
|
|
|
|
|
ActionList Inputs;
|
2016-01-12 07:07:27 +08:00
|
|
|
for (unsigned i = 0, e = Archs.size(); i != e; ++i)
|
|
|
|
Inputs.push_back(C.MakeAction<BindArchAction>(Act, Archs[i]));
|
2009-03-13 02:40:18 +08:00
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// Lipo if necessary, we do it this way because we need to set the arch flag
|
|
|
|
// so that -Xarch_ gets overwritten.
|
2009-03-13 02:40:18 +08:00
|
|
|
if (Inputs.size() == 1 || Act->getType() == types::TY_Nothing)
|
|
|
|
Actions.append(Inputs.begin(), Inputs.end());
|
|
|
|
else
|
2016-01-12 07:07:27 +08:00
|
|
|
Actions.push_back(C.MakeAction<LipoJobAction>(Inputs, Act->getType()));
|
2010-06-05 02:28:41 +08:00
|
|
|
|
2012-02-07 03:43:51 +08:00
|
|
|
// Handle debug info queries.
|
|
|
|
Arg *A = Args.getLastArg(options::OPT_g_Group);
|
2012-04-16 05:22:10 +08:00
|
|
|
if (A && !A->getOption().matches(options::OPT_g0) &&
|
|
|
|
!A->getOption().matches(options::OPT_gstabs) &&
|
|
|
|
ContainsCompileOrAssembleAction(Actions.back())) {
|
2012-07-10 01:31:28 +08:00
|
|
|
|
2012-04-16 05:22:10 +08:00
|
|
|
// Add a 'dsymutil' step if necessary, when debug info is enabled and we
|
|
|
|
// have a compile input. We need to run 'dsymutil' ourselves in such cases
|
2013-01-29 01:39:03 +08:00
|
|
|
// because the debug info will refer to a temporary object file which
|
2012-04-16 05:22:10 +08:00
|
|
|
// will be removed at the end of the compilation process.
|
|
|
|
if (Act->getType() == types::TY_Image) {
|
|
|
|
ActionList Inputs;
|
|
|
|
Inputs.push_back(Actions.back());
|
|
|
|
Actions.pop_back();
|
2016-01-12 07:07:27 +08:00
|
|
|
Actions.push_back(
|
|
|
|
C.MakeAction<DsymutilJobAction>(Inputs, types::TY_dSYM));
|
2010-06-05 02:28:41 +08:00
|
|
|
}
|
2012-04-16 05:22:10 +08:00
|
|
|
|
2014-02-07 02:53:25 +08:00
|
|
|
// Verify the debug info output.
|
2014-01-17 10:06:23 +08:00
|
|
|
if (Args.hasArg(options::OPT_verify_debug_info)) {
|
2016-01-12 07:07:27 +08:00
|
|
|
Action* LastAction = Actions.back();
|
2012-04-16 05:22:10 +08:00
|
|
|
Actions.pop_back();
|
2016-01-12 07:07:27 +08:00
|
|
|
Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>(
|
|
|
|
LastAction, types::TY_Nothing));
|
2012-04-16 05:22:10 +08:00
|
|
|
}
|
|
|
|
}
|
2009-03-13 02:40:18 +08:00
|
|
|
}
|
2009-03-12 15:58:46 +08:00
|
|
|
}
|
|
|
|
|
2013-08-06 08:20:31 +08:00
|
|
|
/// \brief Check that the file referenced by Value exists. If it doesn't,
|
|
|
|
/// issue a diagnostic and return false.
|
2013-12-03 14:53:35 +08:00
|
|
|
static bool DiagnoseInputExistence(const Driver &D, const DerivedArgList &Args,
|
2016-04-15 09:12:32 +08:00
|
|
|
StringRef Value, types::ID Ty) {
|
2013-08-06 08:20:31 +08:00
|
|
|
if (!D.getCheckInputsExist())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// stdin always exists.
|
|
|
|
if (Value == "-")
|
|
|
|
return true;
|
|
|
|
|
|
|
|
SmallString<64> Path(Value);
|
|
|
|
if (Arg *WorkDir = Args.getLastArg(options::OPT_working_directory)) {
|
2015-03-18 18:17:07 +08:00
|
|
|
if (!llvm::sys::path::is_absolute(Path)) {
|
2013-08-06 08:20:31 +08:00
|
|
|
SmallString<64> Directory(WorkDir->getValue());
|
|
|
|
llvm::sys::path::append(Directory, Value);
|
|
|
|
Path.assign(Directory);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (llvm::sys::fs::exists(Twine(Path)))
|
|
|
|
return true;
|
|
|
|
|
2016-04-15 09:12:32 +08:00
|
|
|
if (D.IsCLMode()) {
|
|
|
|
if (!llvm::sys::path::is_absolute(Twine(Path)) &&
|
|
|
|
llvm::sys::Process::FindInEnvPath("LIB", Value))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (Args.hasArg(options::OPT__SLASH_link) && Ty == types::TY_Object) {
|
|
|
|
// Arguments to the /link flag might cause the linker to search for object
|
|
|
|
// and library files in paths we don't know about. Don't error in such
|
|
|
|
// cases.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2014-06-19 01:21:50 +08:00
|
|
|
|
2015-03-18 18:17:07 +08:00
|
|
|
D.Diag(clang::diag::err_drv_no_such_file) << Path;
|
2013-08-06 08:20:31 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-08-13 06:08:57 +08:00
|
|
|
// Construct a the list of inputs and their types.
|
2014-05-03 06:55:30 +08:00
|
|
|
void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
|
2011-08-13 06:08:57 +08:00
|
|
|
InputList &Inputs) const {
|
2009-09-09 07:36:43 +08:00
|
|
|
// Track the current user specified (-x) input. We also explicitly track the
|
|
|
|
// argument used to set the type; we only want to claim the type when we
|
|
|
|
// actually use it, so we warn about unused -x arguments.
|
2009-03-14 01:57:10 +08:00
|
|
|
types::ID InputType = types::TY_Nothing;
|
2014-05-18 00:56:41 +08:00
|
|
|
Arg *InputTypeArg = nullptr;
|
2009-03-14 01:57:10 +08:00
|
|
|
|
2013-08-13 02:34:17 +08:00
|
|
|
// The last /TC or /TP option sets the input type to C or C++ globally.
|
2014-09-13 05:44:24 +08:00
|
|
|
if (Arg *TCTP = Args.getLastArgNoClaim(options::OPT__SLASH_TC,
|
|
|
|
options::OPT__SLASH_TP)) {
|
2013-08-06 08:20:31 +08:00
|
|
|
InputTypeArg = TCTP;
|
2013-08-13 02:34:17 +08:00
|
|
|
InputType = TCTP->getOption().matches(options::OPT__SLASH_TC)
|
2015-06-26 23:47:46 +08:00
|
|
|
? types::TY_C
|
|
|
|
: types::TY_CXX;
|
2013-08-13 02:34:17 +08:00
|
|
|
|
2015-06-26 23:47:46 +08:00
|
|
|
arg_iterator it =
|
|
|
|
Args.filtered_begin(options::OPT__SLASH_TC, options::OPT__SLASH_TP);
|
2013-08-13 02:34:17 +08:00
|
|
|
const arg_iterator ie = Args.filtered_end();
|
|
|
|
Arg *Previous = *it++;
|
|
|
|
bool ShowNote = false;
|
|
|
|
while (it != ie) {
|
2013-09-12 00:38:41 +08:00
|
|
|
Diag(clang::diag::warn_drv_overriding_flag_option)
|
|
|
|
<< Previous->getSpelling() << (*it)->getSpelling();
|
2013-08-13 02:34:17 +08:00
|
|
|
Previous = *it++;
|
|
|
|
ShowNote = true;
|
2013-08-06 08:20:31 +08:00
|
|
|
}
|
2013-08-13 02:34:17 +08:00
|
|
|
if (ShowNote)
|
|
|
|
Diag(clang::diag::note_drv_t_option_is_global);
|
2013-08-06 08:20:31 +08:00
|
|
|
|
|
|
|
// No driver mode exposes -x and /TC or /TP; we don't support mixing them.
|
|
|
|
assert(!Args.hasArg(options::OPT_x) && "-x and /TC or /TP is not allowed");
|
|
|
|
}
|
|
|
|
|
2014-12-30 03:01:36 +08:00
|
|
|
for (Arg *A : Args) {
|
2012-08-21 05:41:17 +08:00
|
|
|
if (A->getOption().getKind() == Option::InputClass) {
|
2012-11-01 12:30:05 +08:00
|
|
|
const char *Value = A->getValue();
|
2009-03-12 15:58:46 +08:00
|
|
|
types::ID Ty = types::TY_INVALID;
|
|
|
|
|
|
|
|
// Infer the input type if necessary.
|
2009-03-14 01:57:10 +08:00
|
|
|
if (InputType == types::TY_Nothing) {
|
|
|
|
// If there was an explicit arg for this, claim it.
|
|
|
|
if (InputTypeArg)
|
|
|
|
InputTypeArg->claim();
|
|
|
|
|
2009-03-12 15:58:46 +08:00
|
|
|
// stdin must be handled specially.
|
|
|
|
if (memcmp(Value, "-", 2) == 0) {
|
2009-09-09 07:36:43 +08:00
|
|
|
// If running with -E, treat as a C input (this changes the builtin
|
|
|
|
// macros, for example). This may be overridden by -ObjC below.
|
2009-03-12 15:58:46 +08:00
|
|
|
//
|
2009-09-09 07:36:43 +08:00
|
|
|
// Otherwise emit an error but still use a valid type to avoid
|
|
|
|
// spurious errors (e.g., no inputs).
|
2013-07-19 04:29:38 +08:00
|
|
|
if (!Args.hasArgNoClaim(options::OPT_E) && !CCCIsCPP())
|
2014-01-29 09:04:40 +08:00
|
|
|
Diag(IsCLMode() ? clang::diag::err_drv_unknown_stdin_type_clang_cl
|
|
|
|
: clang::diag::err_drv_unknown_stdin_type);
|
2009-03-12 15:58:46 +08:00
|
|
|
Ty = types::TY_C;
|
|
|
|
} else {
|
2011-03-17 06:45:02 +08:00
|
|
|
// Otherwise lookup by extension.
|
|
|
|
// Fallback is C if invoked as C preprocessor or Object otherwise.
|
|
|
|
// We use a host hook here because Darwin at least has its own
|
2009-09-09 07:36:43 +08:00
|
|
|
// idea of what .s is.
|
2009-03-12 15:58:46 +08:00
|
|
|
if (const char *Ext = strrchr(Value, '.'))
|
2010-08-02 13:43:56 +08:00
|
|
|
Ty = TC.LookupTypeForExtension(Ext + 1);
|
2009-03-21 07:39:23 +08:00
|
|
|
|
2011-03-17 06:45:02 +08:00
|
|
|
if (Ty == types::TY_INVALID) {
|
2013-07-19 04:29:38 +08:00
|
|
|
if (CCCIsCPP())
|
2011-03-17 06:45:02 +08:00
|
|
|
Ty = types::TY_C;
|
|
|
|
else
|
|
|
|
Ty = types::TY_Object;
|
|
|
|
}
|
2010-02-18 04:32:58 +08:00
|
|
|
|
|
|
|
// If the driver is invoked as C++ compiler (like clang++ or c++) it
|
|
|
|
// should autodetect some input files as C++ for g++ compatibility.
|
2013-07-19 04:29:38 +08:00
|
|
|
if (CCCIsCXX()) {
|
2010-02-18 04:32:58 +08:00
|
|
|
types::ID OldTy = Ty;
|
|
|
|
Ty = types::lookupCXXTypeForCType(Ty);
|
|
|
|
|
|
|
|
if (Ty != OldTy)
|
|
|
|
Diag(clang::diag::warn_drv_treating_input_as_cxx)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< getTypeName(OldTy) << getTypeName(Ty);
|
2010-02-18 04:32:58 +08:00
|
|
|
}
|
2009-03-12 15:58:46 +08:00
|
|
|
}
|
|
|
|
|
2009-05-19 05:47:54 +08:00
|
|
|
// -ObjC and -ObjC++ override the default language, but only for "source
|
|
|
|
// files". We just treat everything that isn't a linker input as a
|
|
|
|
// source file.
|
2009-09-09 07:36:43 +08:00
|
|
|
//
|
2009-05-19 05:47:54 +08:00
|
|
|
// FIXME: Clean this up if we move the phase sequence into the type.
|
2009-03-12 15:58:46 +08:00
|
|
|
if (Ty != types::TY_Object) {
|
|
|
|
if (Args.hasArg(options::OPT_ObjC))
|
|
|
|
Ty = types::TY_ObjC;
|
|
|
|
else if (Args.hasArg(options::OPT_ObjCXX))
|
|
|
|
Ty = types::TY_ObjCXX;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
assert(InputTypeArg && "InputType set w/o InputTypeArg");
|
2014-09-13 02:15:10 +08:00
|
|
|
if (!InputTypeArg->getOption().matches(options::OPT_x)) {
|
|
|
|
// If emulating cl.exe, make sure that /TC and /TP don't affect input
|
|
|
|
// object files.
|
|
|
|
const char *Ext = strrchr(Value, '.');
|
|
|
|
if (Ext && TC.LookupTypeForExtension(Ext + 1) == types::TY_Object)
|
|
|
|
Ty = types::TY_Object;
|
|
|
|
}
|
|
|
|
if (Ty == types::TY_INVALID) {
|
|
|
|
Ty = InputType;
|
|
|
|
InputTypeArg->claim();
|
|
|
|
}
|
2009-03-12 15:58:46 +08:00
|
|
|
}
|
|
|
|
|
2016-04-15 09:12:32 +08:00
|
|
|
if (DiagnoseInputExistence(*this, Args, Value, Ty))
|
2009-03-12 15:58:46 +08:00
|
|
|
Inputs.push_back(std::make_pair(Ty, A));
|
|
|
|
|
2013-08-06 08:20:31 +08:00
|
|
|
} else if (A->getOption().matches(options::OPT__SLASH_Tc)) {
|
|
|
|
StringRef Value = A->getValue();
|
2016-04-15 09:12:32 +08:00
|
|
|
if (DiagnoseInputExistence(*this, Args, Value, types::TY_C)) {
|
2013-08-06 08:20:31 +08:00
|
|
|
Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
|
|
|
|
Inputs.push_back(std::make_pair(types::TY_C, InputArg));
|
|
|
|
}
|
|
|
|
A->claim();
|
|
|
|
} else if (A->getOption().matches(options::OPT__SLASH_Tp)) {
|
|
|
|
StringRef Value = A->getValue();
|
2016-04-15 09:12:32 +08:00
|
|
|
if (DiagnoseInputExistence(*this, Args, Value, types::TY_CXX)) {
|
2013-08-06 08:20:31 +08:00
|
|
|
Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
|
|
|
|
Inputs.push_back(std::make_pair(types::TY_CXX, InputArg));
|
|
|
|
}
|
|
|
|
A->claim();
|
2012-10-20 06:37:06 +08:00
|
|
|
} else if (A->getOption().hasFlag(options::LinkerInput)) {
|
2009-09-09 07:36:43 +08:00
|
|
|
// Just treat as object type, we could make a special type for this if
|
|
|
|
// necessary.
|
2009-03-12 15:58:46 +08:00
|
|
|
Inputs.push_back(std::make_pair(types::TY_Object, A));
|
|
|
|
|
2009-11-19 11:26:40 +08:00
|
|
|
} else if (A->getOption().matches(options::OPT_x)) {
|
2009-09-09 07:36:43 +08:00
|
|
|
InputTypeArg = A;
|
2012-11-01 12:30:05 +08:00
|
|
|
InputType = types::lookupTypeForTypeSpecifier(A->getValue());
|
2012-04-07 08:01:31 +08:00
|
|
|
A->claim();
|
2009-03-12 15:58:46 +08:00
|
|
|
|
|
|
|
// Follow gcc behavior and treat as linker input for invalid -x
|
2009-09-09 07:36:43 +08:00
|
|
|
// options. Its not clear why we shouldn't just revert to unknown; but
|
2010-12-18 05:22:33 +08:00
|
|
|
// this isn't very important, we might as well be bug compatible.
|
2009-03-12 15:58:46 +08:00
|
|
|
if (!InputType) {
|
2012-11-01 12:30:05 +08:00
|
|
|
Diag(clang::diag::err_drv_unknown_language) << A->getValue();
|
2009-03-12 15:58:46 +08:00
|
|
|
InputType = types::TY_Object;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-07-19 04:29:38 +08:00
|
|
|
if (CCCIsCPP() && Inputs.empty()) {
|
2011-03-07 07:31:01 +08:00
|
|
|
// If called as standalone preprocessor, stdin is processed
|
|
|
|
// if no other input is present.
|
2013-08-06 08:20:31 +08:00
|
|
|
Arg *A = MakeInputArg(Args, Opts, "-");
|
2011-03-07 07:31:01 +08:00
|
|
|
Inputs.push_back(std::make_pair(types::TY_C, A));
|
|
|
|
}
|
2011-08-13 06:08:57 +08:00
|
|
|
}
|
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
namespace {
|
|
|
|
/// Provides a convenient interface for different programming models to generate
|
|
|
|
/// the required device actions.
|
|
|
|
class OffloadingActionBuilder final {
|
|
|
|
/// Flag used to trace errors in the builder.
|
|
|
|
bool IsValid = false;
|
|
|
|
|
|
|
|
/// The compilation that is using this builder.
|
|
|
|
Compilation &C;
|
|
|
|
|
|
|
|
/// Map between an input argument and the offload kinds used to process it.
|
|
|
|
std::map<const Arg *, unsigned> InputArgToOffloadKindMap;
|
|
|
|
|
|
|
|
/// Builder interface. It doesn't build anything or keep any state.
|
|
|
|
class DeviceActionBuilder {
|
|
|
|
public:
|
|
|
|
typedef llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PhasesTy;
|
|
|
|
|
|
|
|
enum ActionBuilderReturnCode {
|
|
|
|
// The builder acted successfully on the current action.
|
|
|
|
ABRT_Success,
|
|
|
|
// The builder didn't have to act on the current action.
|
|
|
|
ABRT_Inactive,
|
|
|
|
// The builder was successful and requested the host action to not be
|
|
|
|
// generated.
|
|
|
|
ABRT_Ignore_Host,
|
|
|
|
};
|
|
|
|
|
|
|
|
protected:
|
|
|
|
/// Compilation associated with this builder.
|
|
|
|
Compilation &C;
|
|
|
|
|
|
|
|
/// Tool chains associated with this builder. The same programming
|
|
|
|
/// model may have associated one or more tool chains.
|
|
|
|
SmallVector<const ToolChain *, 2> ToolChains;
|
|
|
|
|
|
|
|
/// The derived arguments associated with this builder.
|
|
|
|
DerivedArgList &Args;
|
|
|
|
|
|
|
|
/// The inputs associated with this builder.
|
|
|
|
const Driver::InputList &Inputs;
|
|
|
|
|
|
|
|
/// The associated offload kind.
|
|
|
|
Action::OffloadKind AssociatedOffloadKind = Action::OFK_None;
|
|
|
|
|
|
|
|
public:
|
|
|
|
DeviceActionBuilder(Compilation &C, DerivedArgList &Args,
|
|
|
|
const Driver::InputList &Inputs,
|
|
|
|
Action::OffloadKind AssociatedOffloadKind)
|
|
|
|
: C(C), Args(Args), Inputs(Inputs),
|
|
|
|
AssociatedOffloadKind(AssociatedOffloadKind) {}
|
|
|
|
virtual ~DeviceActionBuilder() {}
|
|
|
|
|
|
|
|
/// Fill up the array \a DA with all the device dependences that should be
|
|
|
|
/// added to the provided host action \a HostAction. By default it is
|
|
|
|
/// inactive.
|
|
|
|
virtual ActionBuilderReturnCode
|
|
|
|
getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase,
|
|
|
|
phases::ID FinalPhase, PhasesTy &Phases) {
|
|
|
|
return ABRT_Inactive;
|
|
|
|
}
|
2016-01-12 07:27:13 +08:00
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
/// Update the state to include the provided host action \a HostAction as a
|
|
|
|
/// dependency of the current device action. By default it is inactive.
|
|
|
|
virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) {
|
|
|
|
return ABRT_Inactive;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Append top level actions generated by the builder. Return true if errors
|
|
|
|
/// were found.
|
|
|
|
virtual void appendTopLevelActions(ActionList &AL) {}
|
|
|
|
|
|
|
|
/// Append linker actions generated by the builder. Return true if errors
|
|
|
|
/// were found.
|
|
|
|
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
|
|
|
|
|
|
|
|
/// Initialize the builder. Return true if any initialization errors are
|
|
|
|
/// found.
|
|
|
|
virtual bool initialize() { return false; }
|
|
|
|
|
|
|
|
/// Return true if this builder is valid. We have a valid builder if we have
|
|
|
|
/// associated device tool chains.
|
|
|
|
bool isValid() { return !ToolChains.empty(); }
|
|
|
|
|
|
|
|
/// Return the associated offload kind.
|
|
|
|
Action::OffloadKind getAssociatedOffloadKind() {
|
|
|
|
return AssociatedOffloadKind;
|
2015-07-14 07:27:56 +08:00
|
|
|
}
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief CUDA action builder. It injects device code in the host backend
|
|
|
|
/// action.
|
|
|
|
class CudaActionBuilder final : public DeviceActionBuilder {
|
|
|
|
/// Flags to signal if the user requested host-only or device-only
|
|
|
|
/// compilation.
|
|
|
|
bool CompileHostOnly = false;
|
|
|
|
bool CompileDeviceOnly = false;
|
|
|
|
|
|
|
|
/// List of GPU architectures to use in this compilation.
|
|
|
|
SmallVector<CudaArch, 4> GpuArchList;
|
|
|
|
|
|
|
|
/// The CUDA actions for the current input.
|
|
|
|
ActionList CudaDeviceActions;
|
|
|
|
|
|
|
|
/// The CUDA fat binary if it was generated for the current input.
|
|
|
|
Action *CudaFatBinary = nullptr;
|
|
|
|
|
|
|
|
/// Flag that is set to true if this builder acted on the current input.
|
|
|
|
bool IsActive = false;
|
|
|
|
|
|
|
|
public:
|
|
|
|
CudaActionBuilder(Compilation &C, DerivedArgList &Args,
|
|
|
|
const Driver::InputList &Inputs)
|
|
|
|
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {}
|
|
|
|
|
|
|
|
ActionBuilderReturnCode
|
|
|
|
getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase,
|
|
|
|
phases::ID FinalPhase, PhasesTy &Phases) override {
|
|
|
|
if (!IsActive)
|
|
|
|
return ABRT_Inactive;
|
|
|
|
|
|
|
|
// If we don't have more CUDA actions, we don't have any dependences to
|
|
|
|
// create for the host.
|
|
|
|
if (CudaDeviceActions.empty())
|
|
|
|
return ABRT_Success;
|
|
|
|
|
|
|
|
assert(CudaDeviceActions.size() == GpuArchList.size() &&
|
|
|
|
"Expecting one action per GPU architecture.");
|
|
|
|
assert(!CompileHostOnly &&
|
|
|
|
"Not expecting CUDA actions in host-only compilation.");
|
|
|
|
|
|
|
|
// If we are generating code for the device or we are in a backend phase,
|
|
|
|
// we attempt to generate the fat binary. We compile each arch to ptx and
|
|
|
|
// assemble to cubin, then feed the cubin *and* the ptx into a device
|
|
|
|
// "link" action, which uses fatbinary to combine these cubins into one
|
|
|
|
// fatbin. The fatbin is then an input to the host action if not in
|
|
|
|
// device-only mode.
|
|
|
|
if (CompileDeviceOnly || CurPhase == phases::Backend) {
|
|
|
|
ActionList DeviceActions;
|
|
|
|
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
|
|
|
|
// Produce the device action from the current phase up to the assemble
|
|
|
|
// phase.
|
|
|
|
for (auto Ph : Phases) {
|
|
|
|
// Skip the phases that were already dealt with.
|
|
|
|
if (Ph < CurPhase)
|
|
|
|
continue;
|
|
|
|
// We have to be consistent with the host final phase.
|
|
|
|
if (Ph > FinalPhase)
|
|
|
|
break;
|
|
|
|
|
|
|
|
CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
|
|
|
|
C, Args, Ph, CudaDeviceActions[I]);
|
|
|
|
|
|
|
|
if (Ph == phases::Assemble)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we didn't reach the assemble phase, we can't generate the fat
|
|
|
|
// binary. We don't need to generate the fat binary if we are not in
|
|
|
|
// device-only mode.
|
|
|
|
if (!isa<AssembleJobAction>(CudaDeviceActions[I]) ||
|
|
|
|
CompileDeviceOnly)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Action *AssembleAction = CudaDeviceActions[I];
|
|
|
|
assert(AssembleAction->getType() == types::TY_Object);
|
|
|
|
assert(AssembleAction->getInputs().size() == 1);
|
|
|
|
|
|
|
|
Action *BackendAction = AssembleAction->getInputs()[0];
|
|
|
|
assert(BackendAction->getType() == types::TY_PP_Asm);
|
|
|
|
|
|
|
|
for (auto &A : {AssembleAction, BackendAction}) {
|
|
|
|
OffloadAction::DeviceDependences DDep;
|
|
|
|
DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
|
|
|
|
Action::OFK_Cuda);
|
|
|
|
DeviceActions.push_back(
|
|
|
|
C.MakeAction<OffloadAction>(DDep, A->getType()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We generate the fat binary if we have device input actions.
|
|
|
|
if (!DeviceActions.empty()) {
|
|
|
|
CudaFatBinary =
|
|
|
|
C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
|
|
|
|
|
|
|
|
if (!CompileDeviceOnly) {
|
|
|
|
DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
|
|
|
|
Action::OFK_Cuda);
|
|
|
|
// Clear the fat binary, it is already a dependence to an host
|
|
|
|
// action.
|
|
|
|
CudaFatBinary = nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove the CUDA actions as they are already connected to an host
|
|
|
|
// action or fat binary.
|
|
|
|
CudaDeviceActions.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
// We avoid creating host action in device-only mode.
|
|
|
|
return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
|
2016-10-27 08:53:34 +08:00
|
|
|
} else if (CurPhase > phases::Backend) {
|
|
|
|
// If we are past the backend phase and still have a device action, we
|
|
|
|
// don't have to do anything as this action is already a device
|
|
|
|
// top-level action.
|
|
|
|
return ABRT_Success;
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(CurPhase < phases::Backend && "Generating single CUDA "
|
|
|
|
"instructions should only occur "
|
|
|
|
"before the backend phase!");
|
|
|
|
|
|
|
|
// By default, we produce an action for each device arch.
|
|
|
|
for (Action *&A : CudaDeviceActions)
|
|
|
|
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
|
|
|
|
|
|
|
|
return ABRT_Success;
|
|
|
|
}
|
|
|
|
|
|
|
|
ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
|
|
|
|
// While generating code for CUDA, we only depend on the host input action
|
|
|
|
// to trigger the creation of all the CUDA device actions.
|
|
|
|
|
|
|
|
// If we are dealing with an input action, replicate it for each GPU
|
|
|
|
// architecture. If we are in host-only mode we return 'success' so that
|
|
|
|
// the host uses the CUDA offload kind.
|
|
|
|
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
|
|
|
|
assert(!GpuArchList.empty() &&
|
|
|
|
"We should have at least one GPU architecture.");
|
|
|
|
|
|
|
|
// If the host input is not CUDA, we don't need to bother about this
|
|
|
|
// input.
|
|
|
|
if (IA->getType() != types::TY_CUDA) {
|
|
|
|
// The builder will ignore this input.
|
|
|
|
IsActive = false;
|
|
|
|
return ABRT_Inactive;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the flag to true, so that the builder acts on the current input.
|
|
|
|
IsActive = true;
|
|
|
|
|
|
|
|
if (CompileHostOnly)
|
|
|
|
return ABRT_Success;
|
|
|
|
|
|
|
|
// Replicate inputs for each GPU architecture.
|
|
|
|
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
|
|
|
|
CudaDeviceActions.push_back(C.MakeAction<InputAction>(
|
|
|
|
IA->getInputArg(), types::TY_CUDA_DEVICE));
|
|
|
|
|
|
|
|
return ABRT_Success;
|
|
|
|
}
|
|
|
|
|
|
|
|
return IsActive ? ABRT_Success : ABRT_Inactive;
|
|
|
|
}
|
|
|
|
|
|
|
|
void appendTopLevelActions(ActionList &AL) override {
|
|
|
|
// Utility to append actions to the top level list.
|
|
|
|
auto AddTopLevel = [&](Action *A, CudaArch BoundArch) {
|
|
|
|
OffloadAction::DeviceDependences Dep;
|
|
|
|
Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch),
|
|
|
|
Action::OFK_Cuda);
|
|
|
|
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
|
|
|
|
};
|
|
|
|
|
|
|
|
// If we have a fat binary, add it to the list.
|
|
|
|
if (CudaFatBinary) {
|
|
|
|
AddTopLevel(CudaFatBinary, CudaArch::UNKNOWN);
|
|
|
|
CudaDeviceActions.clear();
|
|
|
|
CudaFatBinary = nullptr;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CudaDeviceActions.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// If we have CUDA actions at this point, that's because we have a have
|
|
|
|
// partial compilation, so we should have an action for each GPU
|
|
|
|
// architecture.
|
|
|
|
assert(CudaDeviceActions.size() == GpuArchList.size() &&
|
|
|
|
"Expecting one action per GPU architecture.");
|
|
|
|
assert(ToolChains.size() == 1 &&
|
|
|
|
"Expecting to have a sing CUDA toolchain.");
|
|
|
|
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
|
|
|
|
AddTopLevel(CudaDeviceActions[I], GpuArchList[I]);
|
|
|
|
|
|
|
|
CudaDeviceActions.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool initialize() override {
|
|
|
|
// We don't need to support CUDA.
|
|
|
|
if (!C.hasOffloadToolChain<Action::OFK_Cuda>())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
|
|
|
|
assert(HostTC && "No toolchain for host compilation.");
|
|
|
|
if (HostTC->getTriple().isNVPTX()) {
|
|
|
|
// We do not support targeting NVPTX for host compilation. Throw
|
|
|
|
// an error and abort pipeline construction early so we don't trip
|
|
|
|
// asserts that assume device-side compilation.
|
|
|
|
C.getDriver().Diag(diag::err_drv_cuda_nvptx_host);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
ToolChains.push_back(C.getSingleOffloadToolChain<Action::OFK_Cuda>());
|
|
|
|
|
|
|
|
Arg *PartialCompilationArg = Args.getLastArg(
|
|
|
|
options::OPT_cuda_host_only, options::OPT_cuda_device_only,
|
|
|
|
options::OPT_cuda_compile_host_device);
|
|
|
|
CompileHostOnly = PartialCompilationArg &&
|
|
|
|
PartialCompilationArg->getOption().matches(
|
|
|
|
options::OPT_cuda_host_only);
|
|
|
|
CompileDeviceOnly = PartialCompilationArg &&
|
|
|
|
PartialCompilationArg->getOption().matches(
|
|
|
|
options::OPT_cuda_device_only);
|
|
|
|
|
|
|
|
// Collect all cuda_gpu_arch parameters, removing duplicates.
|
|
|
|
llvm::SmallSet<CudaArch, 4> GpuArchs;
|
|
|
|
bool Error = false;
|
|
|
|
for (Arg *A : Args) {
|
|
|
|
if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
|
|
|
|
continue;
|
|
|
|
A->claim();
|
|
|
|
|
|
|
|
const auto &ArchStr = A->getValue();
|
|
|
|
CudaArch Arch = StringToCudaArch(ArchStr);
|
|
|
|
if (Arch == CudaArch::UNKNOWN) {
|
|
|
|
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
|
|
|
|
Error = true;
|
|
|
|
} else if (GpuArchs.insert(Arch).second)
|
|
|
|
GpuArchList.push_back(Arch);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Default to sm_20 which is the lowest common denominator for supported
|
|
|
|
// GPUs.
|
|
|
|
// sm_20 code should work correctly, if suboptimally, on all newer GPUs.
|
|
|
|
if (GpuArchList.empty())
|
|
|
|
GpuArchList.push_back(CudaArch::SM_20);
|
|
|
|
|
|
|
|
return Error;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Add the implementation for other specialized builders here.
|
|
|
|
|
|
|
|
/// Specialized builders being used by this offloading action builder.
|
|
|
|
SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders;
|
|
|
|
|
|
|
|
public:
|
|
|
|
OffloadingActionBuilder(Compilation &C, DerivedArgList &Args,
|
|
|
|
const Driver::InputList &Inputs)
|
2016-10-27 09:08:58 +08:00
|
|
|
: C(C) {
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
// Create a specialized builder for each device toolchain.
|
|
|
|
|
|
|
|
IsValid = true;
|
|
|
|
|
|
|
|
// Create a specialized builder for CUDA.
|
|
|
|
SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));
|
|
|
|
|
|
|
|
//
|
|
|
|
// TODO: Build other specialized builders here.
|
|
|
|
//
|
|
|
|
|
|
|
|
// Initialize all the builders, keeping track of errors.
|
|
|
|
for (auto *SB : SpecializedBuilders)
|
|
|
|
IsValid = IsValid && !SB->initialize();
|
|
|
|
}
|
|
|
|
|
|
|
|
~OffloadingActionBuilder() {
|
|
|
|
for (auto *SB : SpecializedBuilders)
|
|
|
|
delete SB;
|
|
|
|
}
|
2015-07-14 07:27:56 +08:00
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
/// Generate an action that adds device dependences (if any) to a host action.
|
|
|
|
/// If no device dependence actions exist, just return the host action \a
|
|
|
|
/// HostAction. If an error is found or if no builder requires the host action
|
|
|
|
/// to be generated, return nullptr.
|
|
|
|
Action *
|
|
|
|
addDeviceDependencesToHostAction(Action *HostAction, const Arg *InputArg,
|
|
|
|
phases::ID CurPhase, phases::ID FinalPhase,
|
|
|
|
DeviceActionBuilder::PhasesTy &Phases) {
|
|
|
|
if (!IsValid)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
if (SpecializedBuilders.empty())
|
|
|
|
return HostAction;
|
|
|
|
|
|
|
|
assert(HostAction && "Invalid host action!");
|
|
|
|
|
|
|
|
OffloadAction::DeviceDependences DDeps;
|
|
|
|
// Check if all the programming models agree we should not emit the host
|
|
|
|
// action. Also, keep track of the offloading kinds employed.
|
|
|
|
auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
|
|
|
|
unsigned InactiveBuilders = 0u;
|
|
|
|
unsigned IgnoringBuilders = 0u;
|
|
|
|
for (auto *SB : SpecializedBuilders) {
|
|
|
|
if (!SB->isValid()) {
|
|
|
|
++InactiveBuilders;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto RetCode = SB->getDeviceDepences(DDeps, CurPhase, FinalPhase, Phases);
|
|
|
|
|
|
|
|
// If the builder explicitly says the host action should be ignored,
|
|
|
|
// we need to increment the variable that tracks the builders that request
|
|
|
|
// the host object to be ignored.
|
|
|
|
if (RetCode == DeviceActionBuilder::ABRT_Ignore_Host)
|
|
|
|
++IgnoringBuilders;
|
|
|
|
|
|
|
|
// Unless the builder was inactive for this action, we have to record the
|
|
|
|
// offload kind because the host will have to use it.
|
|
|
|
if (RetCode != DeviceActionBuilder::ABRT_Inactive)
|
|
|
|
OffloadKind |= SB->getAssociatedOffloadKind();
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
}
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
|
|
|
|
// If all builders agree that the host object should be ignored, just return
|
|
|
|
// nullptr.
|
|
|
|
if (IgnoringBuilders &&
|
|
|
|
SpecializedBuilders.size() == (InactiveBuilders + IgnoringBuilders))
|
2016-01-12 07:07:27 +08:00
|
|
|
return nullptr;
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
|
|
|
|
if (DDeps.getActions().empty())
|
|
|
|
return HostAction;
|
|
|
|
|
|
|
|
// We have dependences we need to bundle together. We use an offload action
|
|
|
|
// for that.
|
|
|
|
OffloadAction::HostDependence HDep(
|
|
|
|
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
|
|
|
|
/*BoundArch=*/nullptr, DDeps);
|
|
|
|
return C.MakeAction<OffloadAction>(HDep, DDeps);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Generate an action that adds a host dependence to a device action. The
|
|
|
|
/// results will be kept in this action builder. Return true if an error was
|
|
|
|
/// found.
|
|
|
|
bool addHostDependenceToDeviceActions(Action *HostAction,
|
|
|
|
const Arg *InputArg) {
|
|
|
|
if (!IsValid)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
assert(HostAction && "Invalid host action!");
|
|
|
|
|
|
|
|
// Register the offload kinds that are used.
|
|
|
|
auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
|
|
|
|
for (auto *SB : SpecializedBuilders) {
|
|
|
|
if (!SB->isValid())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
auto RetCode = SB->addDeviceDepences(HostAction);
|
|
|
|
|
|
|
|
// Host dependences for device actions are not compatible with that same
|
|
|
|
// action being ignored.
|
|
|
|
assert(RetCode != DeviceActionBuilder::ABRT_Ignore_Host &&
|
|
|
|
"Host dependence not expected to be ignored.!");
|
|
|
|
|
|
|
|
// Unless the builder was inactive for this action, we have to record the
|
|
|
|
// offload kind because the host will have to use it.
|
|
|
|
if (RetCode != DeviceActionBuilder::ABRT_Inactive)
|
|
|
|
OffloadKind |= SB->getAssociatedOffloadKind();
|
2016-01-15 05:41:27 +08:00
|
|
|
}
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
|
|
|
|
return false;
|
2016-01-15 05:41:27 +08:00
|
|
|
}
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
/// Add the offloading top level actions to the provided action list.
|
|
|
|
bool appendTopLevelActions(ActionList &AL, Action *HostAction,
|
|
|
|
const Arg *InputArg) {
|
|
|
|
for (auto *SB : SpecializedBuilders) {
|
|
|
|
if (!SB->isValid())
|
|
|
|
continue;
|
|
|
|
SB->appendTopLevelActions(AL);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Propagate to the current host action (if any) the offload information
|
|
|
|
// associated with the current input.
|
|
|
|
if (HostAction)
|
|
|
|
HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg],
|
|
|
|
/*BoundArch=*/nullptr);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Processes the host linker action. This currently consists of replacing it
|
|
|
|
/// with an offload action if there are device link objects and propagate to
|
|
|
|
/// the host action all the offload kinds used in the current compilation. The
|
|
|
|
/// resulting action is returned.
|
|
|
|
Action *processHostLinkAction(Action *HostAction) {
|
|
|
|
// Add all the dependences from the device linking actions.
|
|
|
|
OffloadAction::DeviceDependences DDeps;
|
|
|
|
for (auto *SB : SpecializedBuilders) {
|
|
|
|
if (!SB->isValid())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
SB->appendLinkDependences(DDeps);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate all the offload kinds used in the current compilation.
|
|
|
|
unsigned ActiveOffloadKinds = 0u;
|
|
|
|
for (auto &I : InputArgToOffloadKindMap)
|
|
|
|
ActiveOffloadKinds |= I.second;
|
|
|
|
|
|
|
|
// If we don't have device dependencies, we don't have to create an offload
|
|
|
|
// action.
|
|
|
|
if (DDeps.getActions().empty()) {
|
|
|
|
// Propagate all the active kinds to host action. Given that it is a link
|
|
|
|
// action it is assumed to depend on all actions generated so far.
|
|
|
|
HostAction->propagateHostOffloadInfo(ActiveOffloadKinds,
|
|
|
|
/*BoundArch=*/nullptr);
|
|
|
|
return HostAction;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create the offload action with all dependences. When an offload action
|
|
|
|
// is created the kinds are propagated to the host action, so we don't have
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
// to do that explicitly here.
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
OffloadAction::HostDependence HDep(
|
|
|
|
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
|
|
|
|
/*BoundArch*/ nullptr, ActiveOffloadKinds);
|
|
|
|
return C.MakeAction<OffloadAction>(HDep, DDeps);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} // anonymous namespace.
|
2015-07-14 07:27:56 +08:00
|
|
|
|
2016-02-11 10:00:50 +08:00
|
|
|
void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
|
|
|
|
const InputList &Inputs, ActionList &Actions) const {
|
2011-08-13 06:08:57 +08:00
|
|
|
llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
|
2011-03-07 07:31:01 +08:00
|
|
|
|
2009-03-13 08:17:48 +08:00
|
|
|
if (!SuppressMissingInputWarning && Inputs.empty()) {
|
2009-03-13 07:55:14 +08:00
|
|
|
Diag(clang::diag::err_drv_no_input_files);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-07-28 07:36:45 +08:00
|
|
|
Arg *FinalPhaseArg;
|
|
|
|
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
|
2009-03-13 07:55:14 +08:00
|
|
|
|
2013-08-25 22:27:09 +08:00
|
|
|
if (FinalPhase == phases::Link && Args.hasArg(options::OPT_emit_llvm)) {
|
|
|
|
Diag(clang::diag::err_drv_emit_llvm_link);
|
|
|
|
}
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// Reject -Z* at the top level, these options should never have been exposed
|
|
|
|
// by gcc.
|
2009-03-27 00:12:09 +08:00
|
|
|
if (Arg *A = Args.getLastArg(options::OPT_Z_Joined))
|
2009-03-20 14:14:23 +08:00
|
|
|
Diag(clang::diag::err_drv_use_of_Z_option) << A->getAsString(Args);
|
2009-03-13 07:55:14 +08:00
|
|
|
|
2013-08-13 07:26:25 +08:00
|
|
|
// Diagnose misuse of /Fo.
|
|
|
|
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fo)) {
|
|
|
|
StringRef V = A->getValue();
|
2014-11-18 03:16:36 +08:00
|
|
|
if (Inputs.size() > 1 && !V.empty() &&
|
|
|
|
!llvm::sys::path::is_separator(V.back())) {
|
2013-08-13 07:26:25 +08:00
|
|
|
// Check whether /Fo tries to name an output file for multiple inputs.
|
2013-10-19 06:49:04 +08:00
|
|
|
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< A->getSpelling() << V;
|
2013-08-13 07:26:25 +08:00
|
|
|
Args.eraseArg(options::OPT__SLASH_Fo);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-10-19 06:49:04 +08:00
|
|
|
// Diagnose misuse of /Fa.
|
|
|
|
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fa)) {
|
|
|
|
StringRef V = A->getValue();
|
2014-11-18 03:16:36 +08:00
|
|
|
if (Inputs.size() > 1 && !V.empty() &&
|
|
|
|
!llvm::sys::path::is_separator(V.back())) {
|
2013-10-19 06:49:04 +08:00
|
|
|
// Check whether /Fa tries to name an asm file for multiple inputs.
|
|
|
|
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< A->getSpelling() << V;
|
2013-10-19 06:49:04 +08:00
|
|
|
Args.eraseArg(options::OPT__SLASH_Fa);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-12 02:16:21 +08:00
|
|
|
// Diagnose misuse of /o.
|
|
|
|
if (Arg *A = Args.getLastArg(options::OPT__SLASH_o)) {
|
|
|
|
if (A->getValue()[0] == '\0') {
|
|
|
|
// It has to have a value.
|
|
|
|
Diag(clang::diag::err_drv_missing_argument) << A->getSpelling() << 1;
|
|
|
|
Args.eraseArg(options::OPT__SLASH_o);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
clang-cl: Implement initial limited support for precompiled headers.
In the gcc precompiled header model, one explicitly runs clang with `-x
c++-header` on a .h file to produce a gch file, and then includes the header
with `-include foo.h` and if a .gch file exists for that header it gets used.
This is documented at
http://clang.llvm.org/docs/UsersManual.html#precompiled-headers
cl.exe's model is fairly different, and controlled by the two flags /Yc and
/Yu. A pch file is generated as a side effect of a regular compilation when
/Ycheader.h is passed. While the compilation is running, the compiler keeps
track of #include lines in the main translation unit and writes everything up
to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells
the compiler to skip all code in the main TU up to and including `#include
"header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu
without an argument, in that case a `#pragma hrdstop` takes the role of
controlling the point where pch ends and real code begins.)
This patch implements limited support for this in that it requires the pch
header to be passed as a /FI force include flag – with this restriction,
it can be implemented almost completely in the driver with fairly small amounts
of code. For /Yu, this is trivial, and for /Yc a separate pch action is added
that runs before the actual compilation. After r261774, the first failing
command makes a compilation stop – this means if the pch fails to build the
main compilation won't run, which is what we want. However, in /fallback builds
we need to run the main compilation even if the pch build fails so that the
main compilation's fallback can run. To achieve this, add a ForceSuccessCommand
that pretends that the pch build always succeeded in /fallback builds (the main
compilation will then fail to open the pch and run the fallback cl.exe
invocation).
If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl
will now emit a "not implemented yet; flag ignored" warning that can be
disabled using -Wno-clang-cl-pch.
Since clang-cl doesn't yet serialize some important things (most notably
`pragma comment(lib, ...)`, this feature is disabled by default and only
enabled by an internal driver flag. Once it's more stable, this internal flag
will disappear.
(The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not
as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it
does. Implementing support for this should be possible with the approach in
this patch with minimal frontend changes by passing a --stop-at / --start-at
flag from the driver to the frontend. This is left for a follow-up. I don't
think we ever want to support `#pragma hdrstop`, and supporting it with this
approach isn't easy: This approach relies on the driver knowing the pch
filename in advance, and `#pragma hdrstop(out.pch)` can set the output
filename, so the driver can't know about it in advance.)
clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe
would put them, but the pch file format is of course incompatible. This has
ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in
/fallback builds.
http://reviews.llvm.org/D17695
llvm-svn: 262420
2016-03-02 07:16:44 +08:00
|
|
|
// Diagnose unsupported forms of /Yc /Yu. Ignore /Yc/Yu for now if:
|
|
|
|
// * no filename after it
|
|
|
|
// * both /Yc and /Yu passed but with different filenames
|
|
|
|
// * corresponding file not also passed as /FI
|
|
|
|
Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
|
|
|
|
Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
|
|
|
|
if (YcArg && YcArg->getValue()[0] == '\0') {
|
|
|
|
Diag(clang::diag::warn_drv_ycyu_no_arg_clang_cl) << YcArg->getSpelling();
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Yc);
|
|
|
|
YcArg = nullptr;
|
|
|
|
}
|
|
|
|
if (YuArg && YuArg->getValue()[0] == '\0') {
|
|
|
|
Diag(clang::diag::warn_drv_ycyu_no_arg_clang_cl) << YuArg->getSpelling();
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Yu);
|
|
|
|
YuArg = nullptr;
|
|
|
|
}
|
|
|
|
if (YcArg && YuArg && strcmp(YcArg->getValue(), YuArg->getValue()) != 0) {
|
|
|
|
Diag(clang::diag::warn_drv_ycyu_different_arg_clang_cl);
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Yc);
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Yu);
|
|
|
|
YcArg = YuArg = nullptr;
|
|
|
|
}
|
|
|
|
if (YcArg || YuArg) {
|
|
|
|
StringRef Val = YcArg ? YcArg->getValue() : YuArg->getValue();
|
|
|
|
bool FoundMatchingInclude = false;
|
|
|
|
for (const Arg *Inc : Args.filtered(options::OPT_include)) {
|
|
|
|
// FIXME: Do case-insensitive matching and consider / and \ as equal.
|
|
|
|
if (Inc->getValue() == Val)
|
|
|
|
FoundMatchingInclude = true;
|
|
|
|
}
|
|
|
|
if (!FoundMatchingInclude) {
|
|
|
|
Diag(clang::diag::warn_drv_ycyu_no_fi_arg_clang_cl)
|
|
|
|
<< (YcArg ? YcArg : YuArg)->getSpelling();
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Yc);
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Yu);
|
|
|
|
YcArg = YuArg = nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (YcArg && Inputs.size() > 1) {
|
|
|
|
Diag(clang::diag::warn_drv_yc_multiple_inputs_clang_cl);
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Yc);
|
|
|
|
YcArg = nullptr;
|
|
|
|
}
|
|
|
|
if (Args.hasArg(options::OPT__SLASH_Y_)) {
|
|
|
|
// /Y- disables all pch handling. Rather than check for it everywhere,
|
|
|
|
// just remove clang-cl pch-related flags here.
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Fp);
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Yc);
|
|
|
|
Args.eraseArg(options::OPT__SLASH_Yu);
|
|
|
|
YcArg = YuArg = nullptr;
|
|
|
|
}
|
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
// Builder to be used to build offloading actions.
|
|
|
|
OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
2009-03-13 19:38:42 +08:00
|
|
|
// Construct the actions to perform.
|
|
|
|
ActionList LinkerInputs;
|
2013-11-27 13:22:15 +08:00
|
|
|
|
2013-03-07 20:32:26 +08:00
|
|
|
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PL;
|
2015-07-21 04:02:54 +08:00
|
|
|
for (auto &I : Inputs) {
|
|
|
|
types::ID InputType = I.first;
|
|
|
|
const Arg *InputArg = I.second;
|
2009-03-13 19:38:42 +08:00
|
|
|
|
2013-03-07 20:32:26 +08:00
|
|
|
PL.clear();
|
|
|
|
types::getCompilationPhases(InputType, PL);
|
2009-03-13 19:38:42 +08:00
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// If the first step comes after the final phase we are doing as part of
|
|
|
|
// this compilation, warn the user about it.
|
2013-03-07 20:32:26 +08:00
|
|
|
phases::ID InitialPhase = PL[0];
|
2009-03-13 19:38:42 +08:00
|
|
|
if (InitialPhase > FinalPhase) {
|
2009-03-19 15:57:08 +08:00
|
|
|
// Claim here to avoid the more general unused warning.
|
|
|
|
InputArg->claim();
|
2009-09-17 12:13:26 +08:00
|
|
|
|
2011-04-20 23:44:48 +08:00
|
|
|
// Suppress all unused style warnings with -Qunused-arguments
|
|
|
|
if (Args.hasArg(options::OPT_Qunused_arguments))
|
|
|
|
continue;
|
|
|
|
|
2012-08-06 12:09:06 +08:00
|
|
|
// Special case when final phase determined by binary name, rather than
|
|
|
|
// by a command-line argument with a corresponding Arg.
|
2013-07-19 04:29:38 +08:00
|
|
|
if (CCCIsCPP())
|
2012-08-06 12:09:06 +08:00
|
|
|
Diag(clang::diag::warn_drv_input_file_unused_by_cpp)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< InputArg->getAsString(Args) << getPhaseName(InitialPhase);
|
2009-09-17 12:13:26 +08:00
|
|
|
// Special case '-E' warning on a previously preprocessed file to make
|
|
|
|
// more sense.
|
2012-08-06 12:09:06 +08:00
|
|
|
else if (InitialPhase == phases::Compile &&
|
|
|
|
FinalPhase == phases::Preprocess &&
|
|
|
|
getPreprocessedType(InputType) == types::TY_INVALID)
|
2009-09-17 12:13:26 +08:00
|
|
|
Diag(clang::diag::warn_drv_preprocessed_input_file_unused)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< InputArg->getAsString(Args) << !!FinalPhaseArg
|
|
|
|
<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
|
2009-09-17 12:13:26 +08:00
|
|
|
else
|
|
|
|
Diag(clang::diag::warn_drv_input_file_unused)
|
2015-06-26 23:47:46 +08:00
|
|
|
<< InputArg->getAsString(Args) << getPhaseName(InitialPhase)
|
|
|
|
<< !!FinalPhaseArg
|
|
|
|
<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
|
2009-03-13 19:38:42 +08:00
|
|
|
continue;
|
|
|
|
}
|
2009-09-09 07:36:43 +08:00
|
|
|
|
2016-04-22 03:59:10 +08:00
|
|
|
if (YcArg) {
|
|
|
|
// Add a separate precompile phase for the compile phase.
|
|
|
|
if (FinalPhase >= phases::Compile) {
|
2016-08-31 02:55:16 +08:00
|
|
|
const types::ID HeaderType = lookupHeaderTypeForSourceType(InputType);
|
2016-04-22 03:59:10 +08:00
|
|
|
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PCHPL;
|
2016-08-31 02:55:16 +08:00
|
|
|
types::getCompilationPhases(HeaderType, PCHPL);
|
2016-04-22 03:59:10 +08:00
|
|
|
Arg *PchInputArg = MakeInputArg(Args, Opts, YcArg->getValue());
|
|
|
|
|
|
|
|
// Build the pipeline for the pch file.
|
2016-08-31 02:55:16 +08:00
|
|
|
Action *ClangClPch =
|
|
|
|
C.MakeAction<InputAction>(*PchInputArg, HeaderType);
|
2016-04-22 03:59:10 +08:00
|
|
|
for (phases::ID Phase : PCHPL)
|
|
|
|
ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch);
|
|
|
|
assert(ClangClPch);
|
|
|
|
Actions.push_back(ClangClPch);
|
|
|
|
// The driver currently exits after the first failed command. This
|
|
|
|
// relies on that behavior, to make sure if the pch generation fails,
|
|
|
|
// the main compilation won't run.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-03-13 19:38:42 +08:00
|
|
|
// Build the pipeline for this file.
|
2016-01-12 07:07:27 +08:00
|
|
|
Action *Current = C.MakeAction<InputAction>(*InputArg, InputType);
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
|
|
|
|
// Use the current host action in any of the offloading actions, if
|
|
|
|
// required.
|
|
|
|
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
|
|
|
|
break;
|
|
|
|
|
2015-06-26 23:47:46 +08:00
|
|
|
for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
|
|
|
|
i != e; ++i) {
|
2013-03-07 20:32:26 +08:00
|
|
|
phases::ID Phase = *i;
|
2009-03-13 19:38:42 +08:00
|
|
|
|
|
|
|
// We are done if this step is past what the user requested.
|
|
|
|
if (Phase > FinalPhase)
|
|
|
|
break;
|
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
// Add any offload action the host action depends on.
|
|
|
|
Current = OffloadBuilder.addDeviceDependencesToHostAction(
|
|
|
|
Current, InputArg, Phase, FinalPhase, PL);
|
|
|
|
if (!Current)
|
|
|
|
break;
|
|
|
|
|
2009-03-13 19:38:42 +08:00
|
|
|
// Queue linker inputs.
|
|
|
|
if (Phase == phases::Link) {
|
2013-03-07 20:32:26 +08:00
|
|
|
assert((i + 1) == e && "linking must be final compilation step.");
|
2016-01-12 07:07:27 +08:00
|
|
|
LinkerInputs.push_back(Current);
|
|
|
|
Current = nullptr;
|
2009-03-13 19:38:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise construct the appropriate action.
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
auto *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current);
|
2015-07-14 07:27:56 +08:00
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
// We didn't create a new action, so we will just move to the next phase.
|
|
|
|
if (NewCurrent == Current)
|
|
|
|
continue;
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
Current = NewCurrent;
|
|
|
|
|
|
|
|
// Use the current host action in any of the offloading actions, if
|
|
|
|
// required.
|
|
|
|
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
|
|
|
|
break;
|
2015-07-14 07:27:56 +08:00
|
|
|
|
2009-03-13 19:38:42 +08:00
|
|
|
if (Current->getType() == types::TY_Nothing)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
// If we ended with something, add to the output list.
|
|
|
|
if (Current)
|
2016-01-12 07:07:27 +08:00
|
|
|
Actions.push_back(Current);
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
|
|
|
|
// Add any top level actions generated for offloading.
|
|
|
|
OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);
|
2009-03-12 15:58:46 +08:00
|
|
|
}
|
2009-03-13 19:38:42 +08:00
|
|
|
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
// Add a link action if necessary.
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
if (!LinkerInputs.empty()) {
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
|
|
|
|
LA = OffloadBuilder.processHostLinkAction(LA);
|
|
|
|
Actions.push_back(LA);
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
}
|
2009-12-23 07:19:32 +08:00
|
|
|
|
|
|
|
// If we are linking, claim any options which are obviously only used for
|
|
|
|
// compilation.
|
2013-09-17 08:03:41 +08:00
|
|
|
if (FinalPhase == phases::Link && PL.size() == 1) {
|
2009-12-23 07:19:32 +08:00
|
|
|
Args.ClaimAllArgs(options::OPT_CompileOnly_Group);
|
2013-09-17 08:03:41 +08:00
|
|
|
Args.ClaimAllArgs(options::OPT_cl_compile_Group);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Claim ignored clang-cl options.
|
|
|
|
Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
|
2015-07-29 05:01:30 +08:00
|
|
|
|
2016-04-19 10:27:07 +08:00
|
|
|
// Claim --cuda-host-only and --cuda-compile-host-device, which may be passed
|
|
|
|
// to non-CUDA compilations and should not trigger warnings there.
|
2015-07-29 05:01:30 +08:00
|
|
|
Args.ClaimAllArgs(options::OPT_cuda_host_only);
|
2016-04-19 10:27:07 +08:00
|
|
|
Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
|
2009-03-13 19:38:42 +08:00
|
|
|
}
|
|
|
|
|
2016-02-11 10:00:50 +08:00
|
|
|
Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
|
|
|
|
phases::ID Phase, Action *Input) const {
|
2009-03-18 09:38:48 +08:00
|
|
|
llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
|
[CUDA][OpenMP] Add a generic offload action builder
Summary:
This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities:
- Add device dependences to host actions.
- Add host dependence to device actions.
- Register device top-level actions.
The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented.
When the specialized builder is generated, it produces programming-model-specific diagnostics.
A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities.
Reviewers: tra, echristo, ABataev, jlebar, hfinkel
Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18172
llvm-svn: 282865
2016-09-30 23:34:19 +08:00
|
|
|
|
|
|
|
// Some types skip the assembler phase (e.g., llvm-bc), but we can't
|
|
|
|
// encode this in the steps because the intermediate type depends on
|
|
|
|
// arguments. Just special case here.
|
|
|
|
if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm)
|
|
|
|
return Input;
|
|
|
|
|
2009-03-13 19:38:42 +08:00
|
|
|
// Build the appropriate action.
|
|
|
|
switch (Phase) {
|
2015-06-26 23:47:46 +08:00
|
|
|
case phases::Link:
|
|
|
|
llvm_unreachable("link action invalid here.");
|
2009-03-13 19:38:42 +08:00
|
|
|
case phases::Preprocess: {
|
2009-03-30 14:36:42 +08:00
|
|
|
types::ID OutputTy;
|
|
|
|
// -{M, MM} alter the output type.
|
2010-12-09 05:33:40 +08:00
|
|
|
if (Args.hasArg(options::OPT_M, options::OPT_MM)) {
|
2009-03-30 14:36:42 +08:00
|
|
|
OutputTy = types::TY_Dependencies;
|
|
|
|
} else {
|
2012-06-30 06:03:56 +08:00
|
|
|
OutputTy = Input->getType();
|
|
|
|
if (!Args.hasFlag(options::OPT_frewrite_includes,
|
2014-06-24 16:01:01 +08:00
|
|
|
options::OPT_fno_rewrite_includes, false) &&
|
|
|
|
!CCGenDiagnostics)
|
2012-06-30 06:03:56 +08:00
|
|
|
OutputTy = types::getPreprocessedType(OutputTy);
|
2009-03-30 14:36:42 +08:00
|
|
|
assert(OutputTy != types::TY_INVALID &&
|
|
|
|
"Cannot preprocess this input type!");
|
|
|
|
}
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<PreprocessJobAction>(Input, OutputTy);
|
2009-03-13 19:38:42 +08:00
|
|
|
}
|
2012-07-31 09:21:00 +08:00
|
|
|
case phases::Precompile: {
|
Unrevert r280035 now that the clang-cl bug it exposed has been fixed by
r280133. Original commit message:
C++ Modules TS: driver support for building modules.
This works as follows: we add --precompile to the existing gamut of options for
specifying how far to go when compiling an input (-E, -c, -S, etc.). This flag
specifies that an input is taken to the precompilation step and no further, and
this can be specified when building a .pcm from a module interface or when
building a .pch from a header file.
The .cppm extension (and some related extensions) are implicitly recognized as
C++ module interface files. If --precompile is /not/ specified, the file is
compiled (via a .pcm) to a .o file containing the code for the module (and then
potentially also assembled and linked, if -S, -c, etc. are not specified). We
do not yet suppress the emission of object code for other users of the module
interface, so for now this will only work if everything in the .cppm file has
vague linkage.
As with the existing support for module-map modules, prebuilt modules can be
provided as compiler inputs either via the -fmodule-file= command-line argument
or via files named ModuleName.pcm in one of the directories specified via
-fprebuilt-module-path=.
This also exposes the -fmodules-ts cc1 flag in the driver. This is still
experimental, and in particular, the concrete syntax is subject to change as
the Modules TS evolves in the C++ committee. Unlike -fmodules, this flag does
not enable support for implicitly loading module maps nor building modules via
the module cache, but those features can be turned on separately and used in
conjunction with the Modules TS support.
llvm-svn: 280134
2016-08-31 03:06:26 +08:00
|
|
|
types::ID OutputTy = getPrecompiledType(Input->getType());
|
|
|
|
assert(OutputTy != types::TY_INVALID &&
|
|
|
|
"Cannot precompile this input type!");
|
2012-07-31 09:21:00 +08:00
|
|
|
if (Args.hasArg(options::OPT_fsyntax_only)) {
|
|
|
|
// Syntax checks should not emit a PCH file
|
|
|
|
OutputTy = types::TY_Nothing;
|
|
|
|
}
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<PrecompileJobAction>(Input, OutputTy);
|
2012-07-31 09:21:00 +08:00
|
|
|
}
|
2009-03-13 19:38:42 +08:00
|
|
|
case phases::Compile: {
|
2014-08-29 15:25:23 +08:00
|
|
|
if (Args.hasArg(options::OPT_fsyntax_only))
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<CompileJobAction>(Input, types::TY_Nothing);
|
2014-08-29 15:25:23 +08:00
|
|
|
if (Args.hasArg(options::OPT_rewrite_objc))
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<CompileJobAction>(Input, types::TY_RewrittenObjC);
|
2014-08-29 15:25:23 +08:00
|
|
|
if (Args.hasArg(options::OPT_rewrite_legacy_objc))
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<CompileJobAction>(Input,
|
|
|
|
types::TY_RewrittenLegacyObjC);
|
2014-08-29 15:25:23 +08:00
|
|
|
if (Args.hasArg(options::OPT__analyze, options::OPT__analyze_auto))
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<AnalyzeJobAction>(Input, types::TY_Plist);
|
2014-08-29 15:25:23 +08:00
|
|
|
if (Args.hasArg(options::OPT__migrate))
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<MigrateJobAction>(Input, types::TY_Remap);
|
2014-08-29 15:25:23 +08:00
|
|
|
if (Args.hasArg(options::OPT_emit_ast))
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<CompileJobAction>(Input, types::TY_AST);
|
2014-08-29 15:25:23 +08:00
|
|
|
if (Args.hasArg(options::OPT_module_file_info))
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<CompileJobAction>(Input, types::TY_ModuleFile);
|
2014-08-29 15:25:23 +08:00
|
|
|
if (Args.hasArg(options::OPT_verify_pch))
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<VerifyPCHJobAction>(Input, types::TY_Nothing);
|
|
|
|
return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
}
|
|
|
|
case phases::Backend: {
|
2015-10-16 04:35:53 +08:00
|
|
|
if (isUsingLTO()) {
|
2009-09-09 07:36:43 +08:00
|
|
|
types::ID Output =
|
2015-06-26 23:47:46 +08:00
|
|
|
Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<BackendJobAction>(Input, Output);
|
2014-08-29 15:25:23 +08:00
|
|
|
}
|
|
|
|
if (Args.hasArg(options::OPT_emit_llvm)) {
|
2013-08-24 05:34:57 +08:00
|
|
|
types::ID Output =
|
2015-06-26 23:47:46 +08:00
|
|
|
Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<BackendJobAction>(Input, Output);
|
2009-03-13 19:38:42 +08:00
|
|
|
}
|
2016-01-12 07:07:27 +08:00
|
|
|
return C.MakeAction<BackendJobAction>(Input, types::TY_PP_Asm);
|
2009-03-13 19:38:42 +08:00
|
|
|
}
|
|
|
|
case phases::Assemble:
|
2016-01-15 05:41:27 +08:00
|
|
|
return C.MakeAction<AssembleJobAction>(std::move(Input), types::TY_Object);
|
2009-03-13 19:38:42 +08:00
|
|
|
}
|
|
|
|
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable("invalid phase in ConstructPhaseAction");
|
2009-03-12 15:58:46 +08:00
|
|
|
}
|
|
|
|
|
2009-03-18 10:55:38 +08:00
|
|
|
void Driver::BuildJobs(Compilation &C) const {
|
2009-03-18 09:38:48 +08:00
|
|
|
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
|
2009-03-16 14:56:51 +08:00
|
|
|
|
|
|
|
Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// It is an error to provide a -o option if we are making multiple output
|
|
|
|
// files.
|
2009-03-16 14:56:51 +08:00
|
|
|
if (FinalOutput) {
|
|
|
|
unsigned NumOutputs = 0;
|
2014-12-30 05:02:47 +08:00
|
|
|
for (const Action *A : C.getActions())
|
|
|
|
if (A->getType() != types::TY_Nothing)
|
2009-03-16 14:56:51 +08:00
|
|
|
++NumOutputs;
|
2009-09-09 07:36:43 +08:00
|
|
|
|
2009-03-16 14:56:51 +08:00
|
|
|
if (NumOutputs > 1) {
|
|
|
|
Diag(clang::diag::err_drv_output_argument_with_multiple_files);
|
2014-05-18 00:56:41 +08:00
|
|
|
FinalOutput = nullptr;
|
2009-03-16 14:56:51 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-01 06:01:21 +08:00
|
|
|
// Collect the list of architectures.
|
|
|
|
llvm::StringSet<> ArchNames;
|
2014-12-30 03:01:36 +08:00
|
|
|
if (C.getDefaultToolChain().getTriple().isOSBinFormatMachO())
|
|
|
|
for (const Arg *A : C.getArgs())
|
2013-05-01 06:01:21 +08:00
|
|
|
if (A->getOption().matches(options::OPT_arch))
|
|
|
|
ArchNames.insert(A->getValue());
|
2009-03-16 14:56:51 +08:00
|
|
|
|
2016-01-15 05:41:21 +08:00
|
|
|
// Set of (Action, canonical ToolChain triple) pairs we've built jobs for.
|
|
|
|
std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults;
|
2014-12-30 03:01:36 +08:00
|
|
|
for (Action *A : C.getActions()) {
|
2009-09-09 07:36:43 +08:00
|
|
|
// If we are linking an image for multiple archs then the linker wants
|
|
|
|
// -arch_multiple and -final_output <final image name>. Unfortunately, this
|
|
|
|
// doesn't fit in cleanly because we have to pass this information down.
|
2009-03-16 14:56:51 +08:00
|
|
|
//
|
2009-09-09 07:36:43 +08:00
|
|
|
// FIXME: This is a hack; find a cleaner way to integrate this into the
|
|
|
|
// process.
|
2014-05-18 00:56:41 +08:00
|
|
|
const char *LinkingOutput = nullptr;
|
2009-03-27 00:12:09 +08:00
|
|
|
if (isa<LipoJobAction>(A)) {
|
2009-03-16 14:56:51 +08:00
|
|
|
if (FinalOutput)
|
2012-11-01 12:30:05 +08:00
|
|
|
LinkingOutput = FinalOutput->getValue();
|
2009-03-16 14:56:51 +08:00
|
|
|
else
|
2015-01-10 01:38:53 +08:00
|
|
|
LinkingOutput = getDefaultImageName();
|
2009-03-16 14:56:51 +08:00
|
|
|
}
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
BuildJobsForAction(C, A, &C.getDefaultToolChain(),
|
2016-10-08 05:27:26 +08:00
|
|
|
/*BoundArch*/ StringRef(),
|
2009-03-16 14:56:51 +08:00
|
|
|
/*AtTopLevel*/ true,
|
2013-05-01 06:01:21 +08:00
|
|
|
/*MultipleArchs*/ ArchNames.size() > 1,
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
/*LinkingOutput*/ LinkingOutput, CachedResults,
|
|
|
|
/*BuildForOffloadDevice*/ false);
|
2009-03-16 14:56:51 +08:00
|
|
|
}
|
2009-03-16 14:42:30 +08:00
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// If the user passed -Qunused-arguments or there were errors, don't warn
|
|
|
|
// about any unused arguments.
|
2010-11-19 05:47:07 +08:00
|
|
|
if (Diags.hasErrorOccurred() ||
|
2009-04-08 03:04:18 +08:00
|
|
|
C.getArgs().hasArg(options::OPT_Qunused_arguments))
|
2009-03-19 02:03:46 +08:00
|
|
|
return;
|
|
|
|
|
2009-03-30 06:24:54 +08:00
|
|
|
// Claim -### here.
|
2015-06-26 23:47:46 +08:00
|
|
|
(void)C.getArgs().hasArg(options::OPT__HASH_HASH_HASH);
|
2009-09-09 07:36:43 +08:00
|
|
|
|
2016-04-26 05:15:49 +08:00
|
|
|
// Claim --driver-mode, --rsp-quoting, it was handled earlier.
|
2015-06-26 23:47:46 +08:00
|
|
|
(void)C.getArgs().hasArg(options::OPT_driver_mode);
|
2016-04-26 05:15:49 +08:00
|
|
|
(void)C.getArgs().hasArg(options::OPT_rsp_quoting);
|
2013-07-19 04:29:38 +08:00
|
|
|
|
2014-12-30 03:01:36 +08:00
|
|
|
for (Arg *A : C.getArgs()) {
|
2009-03-16 14:42:30 +08:00
|
|
|
// FIXME: It would be nice to be able to send the argument to the
|
2011-09-26 07:23:43 +08:00
|
|
|
// DiagnosticsEngine, so that extra values, position, and so on could be
|
|
|
|
// printed.
|
2009-04-04 08:52:26 +08:00
|
|
|
if (!A->isClaimed()) {
|
2012-10-20 06:37:06 +08:00
|
|
|
if (A->getOption().hasFlag(options::NoArgumentUnused))
|
2009-04-08 03:04:18 +08:00
|
|
|
continue;
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// Suppress the warning automatically if this is just a flag, and it is an
|
|
|
|
// instance of an argument we already claimed.
|
2009-04-04 08:52:26 +08:00
|
|
|
const Option &Opt = A->getOption();
|
2012-08-21 05:41:17 +08:00
|
|
|
if (Opt.getKind() == Option::FlagClass) {
|
2009-04-04 08:52:26 +08:00
|
|
|
bool DuplicateClaimed = false;
|
|
|
|
|
2015-06-09 09:57:17 +08:00
|
|
|
for (const Arg *AA : C.getArgs().filtered(&Opt)) {
|
|
|
|
if (AA->isClaimed()) {
|
2009-04-04 08:52:26 +08:00
|
|
|
DuplicateClaimed = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DuplicateClaimed)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-01-26 05:14:52 +08:00
|
|
|
// In clang-cl, don't mention unknown arguments here since they have
|
|
|
|
// already been warned about.
|
|
|
|
if (!IsCLMode() || !A->getOption().matches(options::OPT_UNKNOWN))
|
|
|
|
Diag(clang::diag::warn_drv_unused_argument)
|
|
|
|
<< A->getAsString(C.getArgs());
|
2009-04-04 08:52:26 +08:00
|
|
|
}
|
2009-03-16 14:42:30 +08:00
|
|
|
}
|
2009-03-14 06:12:33 +08:00
|
|
|
}
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
/// Utility class to control the collapse of dependent actions and select the
|
|
|
|
/// tools accordingly.
|
|
|
|
class ToolSelector final {
|
|
|
|
/// The tool chain this selector refers to.
|
|
|
|
const ToolChain &TC;
|
|
|
|
|
|
|
|
/// The compilation this selector refers to.
|
|
|
|
const Compilation &C;
|
|
|
|
|
|
|
|
/// The base action this selector refers to.
|
|
|
|
const JobAction *BaseAction;
|
|
|
|
|
|
|
|
/// Set to true if the current toolchain refers to host actions.
|
|
|
|
bool IsHostSelector;
|
|
|
|
|
|
|
|
/// Set to true if save-temps and embed-bitcode functionalities are active.
|
|
|
|
bool SaveTemps;
|
|
|
|
bool EmbedBitcode;
|
|
|
|
|
|
|
|
/// Get previous dependent action or null if that does not exist. If
|
|
|
|
/// \a CanBeCollapsed is false, that action must be legal to collapse or
|
|
|
|
/// null will be returned.
|
|
|
|
const JobAction *getPrevDependentAction(const ActionList &Inputs,
|
|
|
|
ActionList &SavedOffloadAction,
|
|
|
|
bool CanBeCollapsed = true) {
|
|
|
|
// An option can be collapsed only if it has a single input.
|
|
|
|
if (Inputs.size() != 1)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Action *CurAction = *Inputs.begin();
|
|
|
|
if (CanBeCollapsed &&
|
|
|
|
!CurAction->isCollapsingWithNextDependentActionLegal())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// If the input action is an offload action. Look through it and save any
|
|
|
|
// offload action that can be dropped in the event of a collapse.
|
|
|
|
if (auto *OA = dyn_cast<OffloadAction>(CurAction)) {
|
|
|
|
// If the dependent action is a device action, we will attempt to collapse
|
|
|
|
// only with other device actions. Otherwise, we would do the same but
|
|
|
|
// with host actions only.
|
|
|
|
if (!IsHostSelector) {
|
|
|
|
if (OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)) {
|
|
|
|
CurAction =
|
|
|
|
OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true);
|
|
|
|
if (CanBeCollapsed &&
|
|
|
|
!CurAction->isCollapsingWithNextDependentActionLegal())
|
|
|
|
return nullptr;
|
|
|
|
SavedOffloadAction.push_back(OA);
|
|
|
|
return dyn_cast<JobAction>(CurAction);
|
|
|
|
}
|
|
|
|
} else if (OA->hasHostDependence()) {
|
|
|
|
CurAction = OA->getHostDependence();
|
|
|
|
if (CanBeCollapsed &&
|
|
|
|
!CurAction->isCollapsingWithNextDependentActionLegal())
|
|
|
|
return nullptr;
|
|
|
|
SavedOffloadAction.push_back(OA);
|
|
|
|
return dyn_cast<JobAction>(CurAction);
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
}
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
return dyn_cast<JobAction>(CurAction);
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
}
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
|
|
|
|
/// Return true if an assemble action can be collapsed.
|
|
|
|
bool canCollapseAssembleAction() const {
|
|
|
|
return TC.useIntegratedAs() && !SaveTemps &&
|
|
|
|
!C.getArgs().hasArg(options::OPT_via_file_asm) &&
|
|
|
|
!C.getArgs().hasArg(options::OPT__SLASH_FA) &&
|
|
|
|
!C.getArgs().hasArg(options::OPT__SLASH_Fa);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return true if a preprocessor action can be collapsed.
|
|
|
|
bool canCollapsePreprocessorAction() const {
|
|
|
|
return !C.getArgs().hasArg(options::OPT_no_integrated_cpp) &&
|
|
|
|
!C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps &&
|
|
|
|
!C.getArgs().hasArg(options::OPT_rewrite_objc);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Struct that relates an action with the offload actions that would be
|
|
|
|
/// collapsed with it.
|
|
|
|
struct JobActionInfo final {
|
|
|
|
/// The action this info refers to.
|
|
|
|
const JobAction *JA = nullptr;
|
|
|
|
/// The offload actions we need to take care off if this action is
|
|
|
|
/// collapsed.
|
|
|
|
ActionList SavedOffloadAction;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Append collapsed offload actions from the give nnumber of elements in the
|
|
|
|
/// action info array.
|
|
|
|
static void AppendCollapsedOffloadAction(ActionList &CollapsedOffloadAction,
|
|
|
|
ArrayRef<JobActionInfo> &ActionInfo,
|
|
|
|
unsigned ElementNum) {
|
|
|
|
assert(ElementNum <= ActionInfo.size() && "Invalid number of elements.");
|
|
|
|
for (unsigned I = 0; I < ElementNum; ++I)
|
|
|
|
CollapsedOffloadAction.append(ActionInfo[I].SavedOffloadAction.begin(),
|
|
|
|
ActionInfo[I].SavedOffloadAction.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Functions that attempt to perform the combining. They detect if that is
|
|
|
|
/// legal, and if so they update the inputs \a Inputs and the offload action
|
|
|
|
/// that were collapsed in \a CollapsedOffloadAction. A tool that deals with
|
|
|
|
/// the combined action is returned. If the combining is not legal or if the
|
|
|
|
/// tool does not exist, null is returned.
|
|
|
|
/// Currently three kinds of collapsing are supported:
|
|
|
|
/// - Assemble + Backend + Compile;
|
|
|
|
/// - Assemble + Backend ;
|
|
|
|
/// - Backend + Compile.
|
|
|
|
const Tool *
|
|
|
|
combineAssembleBackendCompile(ArrayRef<JobActionInfo> ActionInfo,
|
|
|
|
const ActionList *&Inputs,
|
|
|
|
ActionList &CollapsedOffloadAction) {
|
|
|
|
if (ActionInfo.size() < 3 || !canCollapseAssembleAction())
|
|
|
|
return nullptr;
|
|
|
|
auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA);
|
|
|
|
auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA);
|
|
|
|
auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[2].JA);
|
|
|
|
if (!AJ || !BJ || !CJ)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Get compiler tool.
|
|
|
|
const Tool *T = TC.SelectTool(*CJ);
|
|
|
|
if (!T)
|
2014-05-18 00:56:41 +08:00
|
|
|
return nullptr;
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
|
2016-03-01 09:07:58 +08:00
|
|
|
// When using -fembed-bitcode, it is required to have the same tool (clang)
|
|
|
|
// for both CompilerJA and BackendJA. Otherwise, combine two stages.
|
|
|
|
if (EmbedBitcode) {
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
const Tool *BT = TC.SelectTool(*BJ);
|
|
|
|
if (BT == T)
|
|
|
|
return nullptr;
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
}
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
|
|
|
|
if (!T->hasIntegratedAssembler())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Inputs = &CJ->getInputs();
|
|
|
|
AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
|
|
|
|
/*NumElements=*/3);
|
|
|
|
return T;
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
}
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
const Tool *combineAssembleBackend(ArrayRef<JobActionInfo> ActionInfo,
|
|
|
|
const ActionList *&Inputs,
|
|
|
|
ActionList &CollapsedOffloadAction) {
|
|
|
|
if (ActionInfo.size() < 2 || !canCollapseAssembleAction())
|
|
|
|
return nullptr;
|
|
|
|
auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA);
|
|
|
|
auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA);
|
|
|
|
if (!AJ || !BJ)
|
|
|
|
return nullptr;
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
// Retrieve the compile job, backend action must always be preceded by one.
|
|
|
|
ActionList CompileJobOffloadActions;
|
|
|
|
auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions,
|
|
|
|
/*CanBeCollapsed=*/false);
|
|
|
|
if (!AJ || !BJ || !CJ)
|
|
|
|
return nullptr;
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
assert(isa<CompileJobAction>(CJ) &&
|
|
|
|
"Expecting compile job preceding backend job.");
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
// Get compiler tool.
|
|
|
|
const Tool *T = TC.SelectTool(*CJ);
|
|
|
|
if (!T)
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
return nullptr;
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
if (!T->hasIntegratedAssembler())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Inputs = &BJ->getInputs();
|
|
|
|
AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
|
|
|
|
/*NumElements=*/2);
|
|
|
|
return T;
|
2010-02-03 11:07:56 +08:00
|
|
|
}
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
const Tool *combineBackendCompile(ArrayRef<JobActionInfo> ActionInfo,
|
|
|
|
const ActionList *&Inputs,
|
|
|
|
ActionList &CollapsedOffloadAction) {
|
|
|
|
if (ActionInfo.size() < 2 || !canCollapsePreprocessorAction())
|
|
|
|
return nullptr;
|
|
|
|
auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[0].JA);
|
|
|
|
auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[1].JA);
|
|
|
|
if (!BJ || !CJ)
|
|
|
|
return nullptr;
|
2010-02-03 11:07:56 +08:00
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
// Get compiler tool.
|
|
|
|
const Tool *T = TC.SelectTool(*CJ);
|
|
|
|
if (!T)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
if (T->canEmitIR() && (SaveTemps || EmbedBitcode))
|
|
|
|
return nullptr;
|
2010-02-03 11:07:56 +08:00
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
Inputs = &CJ->getInputs();
|
|
|
|
AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
|
|
|
|
/*NumElements=*/2);
|
|
|
|
return T;
|
|
|
|
}
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
/// Updates the inputs if the obtained tool supports combining with
|
|
|
|
/// preprocessor action, and the current input is indeed a preprocessor
|
|
|
|
/// action. If combining results in the collapse of offloading actions, those
|
|
|
|
/// are appended to \a CollapsedOffloadAction.
|
|
|
|
void combineWithPreprocessor(const Tool *T, const ActionList *&Inputs,
|
|
|
|
ActionList &CollapsedOffloadAction) {
|
|
|
|
if (!T || !canCollapsePreprocessorAction() || !T->hasIntegratedCPP())
|
|
|
|
return;
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
// Attempt to get a preprocessor action dependence.
|
|
|
|
ActionList PreprocessJobOffloadActions;
|
|
|
|
auto *PJ = getPrevDependentAction(*Inputs, PreprocessJobOffloadActions);
|
|
|
|
if (!PJ || !isa<PreprocessJobAction>(PJ))
|
|
|
|
return;
|
|
|
|
|
|
|
|
// This is legal to combine. Append any offload action we found and set the
|
|
|
|
// current inputs to preprocessor inputs.
|
|
|
|
CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(),
|
|
|
|
PreprocessJobOffloadActions.end());
|
|
|
|
Inputs = &PJ->getInputs();
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
}
|
2010-02-03 11:07:56 +08:00
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
public:
|
|
|
|
ToolSelector(const JobAction *BaseAction, const ToolChain &TC,
|
|
|
|
const Compilation &C, bool SaveTemps, bool EmbedBitcode)
|
|
|
|
: TC(TC), C(C), BaseAction(BaseAction), SaveTemps(SaveTemps),
|
|
|
|
EmbedBitcode(EmbedBitcode) {
|
|
|
|
assert(BaseAction && "Invalid base action.");
|
|
|
|
IsHostSelector = BaseAction->getOffloadingDeviceKind() == Action::OFK_None;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if a chain of actions can be combined and return the tool that can
|
|
|
|
/// handle the combination of actions. The pointer to the current inputs \a
|
|
|
|
/// Inputs and the list of offload actions \a CollapsedOffloadActions
|
|
|
|
/// connected to collapsed actions are updated accordingly. The latter enables
|
|
|
|
/// the caller of the selector to process them afterwards instead of just
|
|
|
|
/// dropping them. If no suitable tool is found, null will be returned.
|
|
|
|
const Tool *getTool(const ActionList *&Inputs,
|
|
|
|
ActionList &CollapsedOffloadAction) {
|
|
|
|
//
|
|
|
|
// Get the largest chain of actions that we could combine.
|
|
|
|
//
|
|
|
|
|
|
|
|
SmallVector<JobActionInfo, 5> ActionChain(1);
|
|
|
|
ActionChain.back().JA = BaseAction;
|
|
|
|
while (ActionChain.back().JA) {
|
|
|
|
const Action *CurAction = ActionChain.back().JA;
|
|
|
|
|
|
|
|
// Grow the chain by one element.
|
|
|
|
ActionChain.resize(ActionChain.size() + 1);
|
|
|
|
JobActionInfo &AI = ActionChain.back();
|
|
|
|
|
|
|
|
// Attempt to fill it with the
|
|
|
|
AI.JA =
|
|
|
|
getPrevDependentAction(CurAction->getInputs(), AI.SavedOffloadAction);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pop the last action info as it could not be filled.
|
|
|
|
ActionChain.pop_back();
|
|
|
|
|
|
|
|
//
|
|
|
|
// Attempt to combine actions. If all combining attempts failed, just return
|
|
|
|
// the tool of the provided action. At the end we attempt to combine the
|
|
|
|
// action with any preprocessor action it may depend on.
|
|
|
|
//
|
|
|
|
|
|
|
|
const Tool *T = combineAssembleBackendCompile(ActionChain, Inputs,
|
|
|
|
CollapsedOffloadAction);
|
|
|
|
if (!T)
|
|
|
|
T = combineAssembleBackend(ActionChain, Inputs, CollapsedOffloadAction);
|
|
|
|
if (!T)
|
|
|
|
T = combineBackendCompile(ActionChain, Inputs, CollapsedOffloadAction);
|
|
|
|
if (!T) {
|
|
|
|
Inputs = &BaseAction->getInputs();
|
|
|
|
T = TC.SelectTool(*BaseAction);
|
|
|
|
}
|
|
|
|
|
|
|
|
combineWithPreprocessor(T, Inputs, CollapsedOffloadAction);
|
|
|
|
return T;
|
|
|
|
}
|
|
|
|
};
|
2010-02-03 11:07:56 +08:00
|
|
|
}
|
|
|
|
|
2016-01-15 05:41:21 +08:00
|
|
|
InputInfo Driver::BuildJobsForAction(
|
2016-10-08 05:27:26 +08:00
|
|
|
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
|
2016-01-15 05:41:21 +08:00
|
|
|
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
|
|
|
|
bool BuildForOffloadDevice) const {
|
2016-01-16 11:30:08 +08:00
|
|
|
// The bound arch is not necessarily represented in the toolchain's triple --
|
|
|
|
// for example, armv7 and armv7s both map to the same triple -- so we need
|
|
|
|
// both in our map.
|
|
|
|
std::string TriplePlusArch = TC->getTriple().normalize();
|
2016-10-08 05:27:26 +08:00
|
|
|
if (!BoundArch.empty()) {
|
2016-01-16 11:30:08 +08:00
|
|
|
TriplePlusArch += "-";
|
|
|
|
TriplePlusArch += BoundArch;
|
|
|
|
}
|
|
|
|
std::pair<const Action *, std::string> ActionTC = {A, TriplePlusArch};
|
2016-01-15 05:41:21 +08:00
|
|
|
auto CachedResult = CachedResults.find(ActionTC);
|
|
|
|
if (CachedResult != CachedResults.end()) {
|
|
|
|
return CachedResult->second;
|
|
|
|
}
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
InputInfo Result = BuildJobsForActionNoCache(
|
|
|
|
C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput,
|
|
|
|
CachedResults, BuildForOffloadDevice);
|
2016-01-15 05:41:21 +08:00
|
|
|
CachedResults[ActionTC] = Result;
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
|
|
|
InputInfo Driver::BuildJobsForActionNoCache(
|
2016-10-08 05:27:26 +08:00
|
|
|
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
|
2016-01-15 05:41:21 +08:00
|
|
|
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
|
|
|
|
bool BuildForOffloadDevice) const {
|
2009-09-09 07:36:43 +08:00
|
|
|
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
|
2009-03-19 07:18:19 +08:00
|
|
|
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
InputInfoList OffloadDependencesInputInfo;
|
|
|
|
if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
|
|
|
|
// The offload action is expected to be used in four different situations.
|
|
|
|
//
|
|
|
|
// a) Set a toolchain/architecture/kind for a host action:
|
|
|
|
// Host Action 1 -> OffloadAction -> Host Action 2
|
|
|
|
//
|
|
|
|
// b) Set a toolchain/architecture/kind for a device action;
|
|
|
|
// Device Action 1 -> OffloadAction -> Device Action 2
|
|
|
|
//
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
// c) Specify a device dependence to a host action;
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
// Device Action 1 _
|
|
|
|
// \
|
|
|
|
// Host Action 1 ---> OffloadAction -> Host Action 2
|
|
|
|
//
|
|
|
|
// d) Specify a host dependence to a device action.
|
|
|
|
// Host Action 1 _
|
|
|
|
// \
|
|
|
|
// Device Action 1 ---> OffloadAction -> Device Action 2
|
|
|
|
//
|
|
|
|
// For a) and b), we just return the job generated for the dependence. For
|
|
|
|
// c) and d) we override the current action with the host/device dependence
|
|
|
|
// if the current toolchain is host/device and set the offload dependences
|
|
|
|
// info with the jobs obtained from the device/host dependence(s).
|
|
|
|
|
|
|
|
// If there is a single device option, just generate the job for it.
|
|
|
|
if (OA->hasSingleDeviceDependence()) {
|
|
|
|
InputInfo DevA;
|
|
|
|
OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC,
|
|
|
|
const char *DepBoundArch) {
|
|
|
|
DevA =
|
|
|
|
BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel,
|
|
|
|
/*MultipleArchs*/ !!DepBoundArch, LinkingOutput,
|
|
|
|
CachedResults, /*BuildForOffloadDevice=*/true);
|
|
|
|
});
|
|
|
|
return DevA;
|
2015-07-14 07:27:56 +08:00
|
|
|
}
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
|
|
|
|
// If 'Action 2' is host, we generate jobs for the device dependences and
|
|
|
|
// override the current action with the host dependence. Otherwise, we
|
|
|
|
// generate the host dependences and override the action with the device
|
|
|
|
// dependence. The dependences can't therefore be a top-level action.
|
|
|
|
OA->doOnEachDependence(
|
|
|
|
/*IsHostDependence=*/BuildForOffloadDevice,
|
|
|
|
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
|
|
|
|
OffloadDependencesInputInfo.push_back(BuildJobsForAction(
|
|
|
|
C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false,
|
|
|
|
/*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults,
|
|
|
|
/*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() !=
|
|
|
|
Action::OFK_None));
|
|
|
|
});
|
|
|
|
|
|
|
|
A = BuildForOffloadDevice
|
|
|
|
? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)
|
|
|
|
: OA->getHostDependence();
|
2015-07-14 07:27:56 +08:00
|
|
|
}
|
|
|
|
|
2009-03-16 14:56:51 +08:00
|
|
|
if (const InputAction *IA = dyn_cast<InputAction>(A)) {
|
2009-09-09 07:36:43 +08:00
|
|
|
// FIXME: It would be nice to not claim this here; maybe the old scheme of
|
|
|
|
// just using Args was better?
|
2009-03-19 15:29:38 +08:00
|
|
|
const Arg &Input = IA->getInputArg();
|
|
|
|
Input.claim();
|
2010-06-10 06:31:08 +08:00
|
|
|
if (Input.getOption().matches(options::OPT_INPUT)) {
|
2012-11-01 12:30:05 +08:00
|
|
|
const char *Name = Input.getValue();
|
2016-01-12 07:15:21 +08:00
|
|
|
return InputInfo(A, Name, /* BaseInput = */ Name);
|
2015-06-17 02:01:24 +08:00
|
|
|
}
|
2016-01-12 07:15:21 +08:00
|
|
|
return InputInfo(A, &Input, /* BaseInput = */ "");
|
2009-03-16 14:56:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {
|
2012-04-28 00:50:38 +08:00
|
|
|
const ToolChain *TC;
|
2016-10-08 05:27:26 +08:00
|
|
|
StringRef ArchName = BAA->getArchName();
|
2009-09-09 07:37:19 +08:00
|
|
|
|
2016-10-08 05:27:26 +08:00
|
|
|
if (!ArchName.empty())
|
2016-04-21 18:16:48 +08:00
|
|
|
TC = &getToolChain(C.getArgs(),
|
|
|
|
computeTargetTriple(*this, DefaultTargetTriple,
|
|
|
|
C.getArgs(), ArchName));
|
2012-04-28 00:50:38 +08:00
|
|
|
else
|
|
|
|
TC = &C.getDefaultToolChain();
|
2009-09-09 07:37:19 +08:00
|
|
|
|
2016-02-24 03:30:43 +08:00
|
|
|
return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel,
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
MultipleArchs, LinkingOutput, CachedResults,
|
|
|
|
BuildForOffloadDevice);
|
2009-03-16 14:56:51 +08:00
|
|
|
}
|
|
|
|
|
2015-07-14 07:27:56 +08:00
|
|
|
|
2009-03-16 14:56:51 +08:00
|
|
|
const ActionList *Inputs = &A->getInputs();
|
2010-02-03 11:07:56 +08:00
|
|
|
|
|
|
|
const JobAction *JA = cast<JobAction>(A);
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
ActionList CollapsedOffloadActions;
|
|
|
|
|
[Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Summary:
This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed.
The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888.
This patch does not add new testing, it preserves the existing functionality.
Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel
Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli
Differential Revision: https://reviews.llvm.org/D21840
llvm-svn: 285307
2016-10-28 00:29:20 +08:00
|
|
|
ToolSelector TS(JA, *TC, C, isSaveTempsEnabled(), embedBitcodeEnabled());
|
|
|
|
const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions);
|
|
|
|
|
2013-03-24 23:06:53 +08:00
|
|
|
if (!T)
|
2016-01-12 07:09:32 +08:00
|
|
|
return InputInfo();
|
2009-03-16 14:56:51 +08:00
|
|
|
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
// If we've collapsed action list that contained OffloadAction we
|
|
|
|
// need to build jobs for host/device-side inputs it may have held.
|
|
|
|
for (const auto *OA : CollapsedOffloadActions)
|
|
|
|
cast<OffloadAction>(OA)->doOnEachDependence(
|
|
|
|
/*IsHostDependence=*/BuildForOffloadDevice,
|
|
|
|
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
|
|
|
|
OffloadDependencesInputInfo.push_back(BuildJobsForAction(
|
2016-08-23 02:50:34 +08:00
|
|
|
C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false,
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
/*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults,
|
|
|
|
/*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() !=
|
|
|
|
Action::OFK_None));
|
|
|
|
});
|
2015-08-28 02:10:41 +08:00
|
|
|
|
2009-03-16 14:56:51 +08:00
|
|
|
// Only use pipes when there is exactly one input.
|
2009-03-18 14:00:36 +08:00
|
|
|
InputInfoList InputInfos;
|
2014-12-30 05:02:47 +08:00
|
|
|
for (const Action *Input : *Inputs) {
|
2013-02-18 08:38:25 +08:00
|
|
|
// Treat dsymutil and verify sub-jobs as being at the top-level too, they
|
|
|
|
// shouldn't get temporary output names.
|
2010-06-05 02:28:41 +08:00
|
|
|
// FIXME: Clean this up.
|
2016-01-12 07:09:32 +08:00
|
|
|
bool SubJobAtTopLevel =
|
|
|
|
AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A));
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
InputInfos.push_back(BuildJobsForAction(
|
|
|
|
C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput,
|
|
|
|
CachedResults, BuildForOffloadDevice));
|
2009-03-16 14:56:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Always use the first input as the base input.
|
|
|
|
const char *BaseInput = InputInfos[0].getBaseInput();
|
2009-03-18 01:53:55 +08:00
|
|
|
|
2010-06-05 02:28:41 +08:00
|
|
|
// ... except dsymutil actions, which use their actual input as the base
|
|
|
|
// input.
|
|
|
|
if (JA->getType() == types::TY_dSYM)
|
|
|
|
BaseInput = InputInfos[0].getFilename();
|
|
|
|
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
// Append outputs of offload device jobs to the input list
|
|
|
|
if (!OffloadDependencesInputInfo.empty())
|
|
|
|
InputInfos.append(OffloadDependencesInputInfo.begin(),
|
|
|
|
OffloadDependencesInputInfo.end());
|
2015-07-14 07:27:56 +08:00
|
|
|
|
2016-07-28 07:02:20 +08:00
|
|
|
// Set the effective triple of the toolchain for the duration of this job.
|
|
|
|
llvm::Triple EffectiveTriple;
|
|
|
|
const ToolChain &ToolTC = T->getToolChain();
|
|
|
|
const ArgList &Args = C.getArgsForToolChain(TC, BoundArch);
|
|
|
|
if (InputInfos.size() != 1) {
|
|
|
|
EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args));
|
|
|
|
} else {
|
|
|
|
// Pass along the input type if it can be unambiguously determined.
|
|
|
|
EffectiveTriple = llvm::Triple(
|
|
|
|
ToolTC.ComputeEffectiveClangTriple(Args, InputInfos[0].getType()));
|
|
|
|
}
|
|
|
|
RegisterEffectiveTriple TripleRAII(ToolTC, EffectiveTriple);
|
|
|
|
|
2010-08-02 10:38:15 +08:00
|
|
|
// Determine the place to write output to, if any.
|
2016-01-12 07:09:32 +08:00
|
|
|
InputInfo Result;
|
2013-02-05 15:29:57 +08:00
|
|
|
if (JA->getType() == types::TY_Nothing)
|
2016-01-12 07:15:21 +08:00
|
|
|
Result = InputInfo(A, BaseInput);
|
2013-02-05 15:29:57 +08:00
|
|
|
else
|
2016-01-12 07:15:21 +08:00
|
|
|
Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
AtTopLevel, MultipleArchs,
|
|
|
|
TC->getTriple().normalize()),
|
2016-01-12 07:15:21 +08:00
|
|
|
BaseInput);
|
2009-03-18 01:53:55 +08:00
|
|
|
|
2011-08-03 01:58:04 +08:00
|
|
|
if (CCCPrintBindings && !CCGenDiagnostics) {
|
2013-03-24 23:06:53 +08:00
|
|
|
llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"'
|
|
|
|
<< " - \"" << T->getName() << "\", inputs: [";
|
2009-03-18 06:47:06 +08:00
|
|
|
for (unsigned i = 0, e = InputInfos.size(); i != e; ++i) {
|
|
|
|
llvm::errs() << InputInfos[i].getAsString();
|
|
|
|
if (i + 1 != e)
|
|
|
|
llvm::errs() << ", ";
|
|
|
|
}
|
|
|
|
llvm::errs() << "], output: " << Result.getAsString() << "\n";
|
|
|
|
} else {
|
2016-07-28 07:01:55 +08:00
|
|
|
T->ConstructJob(C, *JA, Result, InputInfos,
|
2013-03-24 23:06:53 +08:00
|
|
|
C.getArgsForToolChain(TC, BoundArch), LinkingOutput);
|
2009-03-18 06:47:06 +08:00
|
|
|
}
|
2016-01-12 07:09:32 +08:00
|
|
|
return Result;
|
2009-03-16 14:56:51 +08:00
|
|
|
}
|
|
|
|
|
2015-01-10 01:38:53 +08:00
|
|
|
const char *Driver::getDefaultImageName() const {
|
|
|
|
llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple));
|
|
|
|
return Target.isOSWindows() ? "a.exe" : "a.out";
|
|
|
|
}
|
|
|
|
|
2013-10-18 00:16:23 +08:00
|
|
|
/// \brief Create output filename based on ArgValue, which could either be a
|
|
|
|
/// full filename, filename without extension, or a directory. If ArgValue
|
|
|
|
/// does not provide a filename, then use BaseName, and use the extension
|
|
|
|
/// suitable for FileType.
|
2013-08-13 05:56:42 +08:00
|
|
|
static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,
|
2015-06-26 23:47:46 +08:00
|
|
|
StringRef BaseName,
|
|
|
|
types::ID FileType) {
|
2013-08-13 05:56:42 +08:00
|
|
|
SmallString<128> Filename = ArgValue;
|
2014-06-27 04:59:36 +08:00
|
|
|
|
2013-09-11 04:18:04 +08:00
|
|
|
if (ArgValue.empty()) {
|
|
|
|
// If the argument is empty, output to BaseName in the current dir.
|
|
|
|
Filename = BaseName;
|
|
|
|
} else if (llvm::sys::path::is_separator(Filename.back())) {
|
2013-08-13 05:56:42 +08:00
|
|
|
// If the argument is a directory, output to BaseName in that dir.
|
|
|
|
llvm::sys::path::append(Filename, BaseName);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!llvm::sys::path::has_extension(ArgValue)) {
|
|
|
|
// If the argument didn't provide an extension, then set it.
|
|
|
|
const char *Extension = types::getTypeTempSuffix(FileType, true);
|
2013-09-11 04:18:04 +08:00
|
|
|
|
|
|
|
if (FileType == types::TY_Image &&
|
|
|
|
Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd)) {
|
|
|
|
// The output file is a dll.
|
|
|
|
Extension = "dll";
|
|
|
|
}
|
|
|
|
|
2013-08-13 05:56:42 +08:00
|
|
|
llvm::sys::path::replace_extension(Filename, Extension);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Args.MakeArgString(Filename.c_str());
|
|
|
|
}
|
|
|
|
|
2015-06-26 23:47:46 +08:00
|
|
|
const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
|
2009-03-18 01:53:55 +08:00
|
|
|
const char *BaseInput,
|
2016-10-08 05:27:26 +08:00
|
|
|
StringRef BoundArch, bool AtTopLevel,
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
bool MultipleArchs,
|
|
|
|
StringRef NormalizedTriple) const {
|
2009-03-18 09:38:48 +08:00
|
|
|
llvm::PrettyStackTraceString CrashInfo("Computing output path");
|
2009-03-18 01:53:55 +08:00
|
|
|
// Output to a user requested destination?
|
2015-06-26 23:47:46 +08:00
|
|
|
if (AtTopLevel && !isa<DsymutilJobAction>(JA) && !isa<VerifyJobAction>(JA)) {
|
2009-03-18 01:53:55 +08:00
|
|
|
if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
|
2013-01-25 03:14:47 +08:00
|
|
|
return C.addResultFile(FinalOutput->getValue(), &JA);
|
2009-03-18 01:53:55 +08:00
|
|
|
}
|
|
|
|
|
2013-12-21 02:40:46 +08:00
|
|
|
// For /P, preprocess to file named after BaseInput.
|
|
|
|
if (C.getArgs().hasArg(options::OPT__SLASH_P)) {
|
|
|
|
assert(AtTopLevel && isa<PreprocessJobAction>(JA));
|
|
|
|
StringRef BaseName = llvm::sys::path::filename(BaseInput);
|
2014-06-17 08:19:12 +08:00
|
|
|
StringRef NameArg;
|
2015-06-09 18:24:06 +08:00
|
|
|
if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi))
|
2014-06-17 08:19:12 +08:00
|
|
|
NameArg = A->getValue();
|
2015-06-26 23:47:46 +08:00
|
|
|
return C.addResultFile(
|
|
|
|
MakeCLOutputFilename(C.getArgs(), NameArg, BaseName, types::TY_PP_C),
|
|
|
|
&JA);
|
2013-12-21 02:40:46 +08:00
|
|
|
}
|
|
|
|
|
2010-09-24 08:46:53 +08:00
|
|
|
// Default to writing to stdout?
|
2013-03-28 00:47:18 +08:00
|
|
|
if (AtTopLevel && !CCGenDiagnostics &&
|
|
|
|
(isa<PreprocessJobAction>(JA) || JA.getType() == types::TY_ModuleFile))
|
2010-09-24 08:46:53 +08:00
|
|
|
return "-";
|
|
|
|
|
2013-10-18 00:16:23 +08:00
|
|
|
// Is this the assembly listing for /FA?
|
|
|
|
if (JA.getType() == types::TY_PP_Asm &&
|
|
|
|
(C.getArgs().hasArg(options::OPT__SLASH_FA) ||
|
|
|
|
C.getArgs().hasArg(options::OPT__SLASH_Fa))) {
|
|
|
|
// Use /Fa and the input filename to determine the asm file name.
|
|
|
|
StringRef BaseName = llvm::sys::path::filename(BaseInput);
|
|
|
|
StringRef FaValue = C.getArgs().getLastArgValue(options::OPT__SLASH_Fa);
|
2015-06-26 23:47:46 +08:00
|
|
|
return C.addResultFile(
|
|
|
|
MakeCLOutputFilename(C.getArgs(), FaValue, BaseName, JA.getType()),
|
|
|
|
&JA);
|
2013-10-18 00:16:23 +08:00
|
|
|
}
|
|
|
|
|
2009-03-18 01:53:55 +08:00
|
|
|
// Output to a temporary file?
|
2015-02-03 06:41:48 +08:00
|
|
|
if ((!AtTopLevel && !isSaveTempsEnabled() &&
|
2015-06-26 23:47:46 +08:00
|
|
|
!C.getArgs().hasArg(options::OPT__SLASH_Fo)) ||
|
2011-08-03 01:58:04 +08:00
|
|
|
CCGenDiagnostics) {
|
2011-08-27 06:27:02 +08:00
|
|
|
StringRef Name = llvm::sys::path::filename(BaseInput);
|
|
|
|
std::pair<StringRef, StringRef> Split = Name.split('.');
|
2015-06-26 23:47:46 +08:00
|
|
|
std::string TmpName = GetTemporaryPath(
|
|
|
|
Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
|
2009-03-19 03:34:39 +08:00
|
|
|
return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
|
2009-03-18 01:53:55 +08:00
|
|
|
}
|
|
|
|
|
2012-02-05 10:13:05 +08:00
|
|
|
SmallString<128> BasePath(BaseInput);
|
2011-07-23 18:55:15 +08:00
|
|
|
StringRef BaseName;
|
2011-03-26 02:16:51 +08:00
|
|
|
|
|
|
|
// Dsymutil actions should use the full path.
|
2011-08-24 01:56:55 +08:00
|
|
|
if (isa<DsymutilJobAction>(JA) || isa<VerifyJobAction>(JA))
|
2011-03-26 02:16:51 +08:00
|
|
|
BaseName = BasePath;
|
|
|
|
else
|
|
|
|
BaseName = llvm::sys::path::filename(BasePath);
|
2009-03-18 01:53:55 +08:00
|
|
|
|
|
|
|
// Determine what the derived output name should be.
|
|
|
|
const char *NamedOutput;
|
2013-08-07 06:11:28 +08:00
|
|
|
|
2016-10-05 05:01:04 +08:00
|
|
|
if ((JA.getType() == types::TY_Object || JA.getType() == types::TY_LTO_BC) &&
|
2014-09-12 02:16:21 +08:00
|
|
|
C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) {
|
|
|
|
// The /Fo or /o flag decides the object filename.
|
2015-06-26 23:47:46 +08:00
|
|
|
StringRef Val =
|
|
|
|
C.getArgs()
|
|
|
|
.getLastArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)
|
|
|
|
->getValue();
|
|
|
|
NamedOutput =
|
|
|
|
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object);
|
2013-08-13 05:56:42 +08:00
|
|
|
} else if (JA.getType() == types::TY_Image &&
|
2015-06-26 23:47:46 +08:00
|
|
|
C.getArgs().hasArg(options::OPT__SLASH_Fe,
|
|
|
|
options::OPT__SLASH_o)) {
|
2014-09-12 02:16:21 +08:00
|
|
|
// The /Fe or /o flag names the linked file.
|
2015-06-26 23:47:46 +08:00
|
|
|
StringRef Val =
|
|
|
|
C.getArgs()
|
|
|
|
.getLastArg(options::OPT__SLASH_Fe, options::OPT__SLASH_o)
|
|
|
|
->getValue();
|
|
|
|
NamedOutput =
|
|
|
|
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Image);
|
2013-09-11 04:18:04 +08:00
|
|
|
} else if (JA.getType() == types::TY_Image) {
|
2013-08-13 05:56:42 +08:00
|
|
|
if (IsCLMode()) {
|
|
|
|
// clang-cl uses BaseName for the executable name.
|
2015-06-26 23:47:46 +08:00
|
|
|
NamedOutput =
|
|
|
|
MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image);
|
2016-10-08 05:27:26 +08:00
|
|
|
} else if (MultipleArchs && !BoundArch.empty()) {
|
2015-01-10 01:38:53 +08:00
|
|
|
SmallString<128> Output(getDefaultImageName());
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
Output += JA.getOffloadingFileNamePrefix(NormalizedTriple);
|
2013-05-01 06:01:21 +08:00
|
|
|
Output += "-";
|
|
|
|
Output.append(BoundArch);
|
|
|
|
NamedOutput = C.getArgs().MakeArgString(Output.c_str());
|
clang-cl: Implement initial limited support for precompiled headers.
In the gcc precompiled header model, one explicitly runs clang with `-x
c++-header` on a .h file to produce a gch file, and then includes the header
with `-include foo.h` and if a .gch file exists for that header it gets used.
This is documented at
http://clang.llvm.org/docs/UsersManual.html#precompiled-headers
cl.exe's model is fairly different, and controlled by the two flags /Yc and
/Yu. A pch file is generated as a side effect of a regular compilation when
/Ycheader.h is passed. While the compilation is running, the compiler keeps
track of #include lines in the main translation unit and writes everything up
to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells
the compiler to skip all code in the main TU up to and including `#include
"header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu
without an argument, in that case a `#pragma hrdstop` takes the role of
controlling the point where pch ends and real code begins.)
This patch implements limited support for this in that it requires the pch
header to be passed as a /FI force include flag – with this restriction,
it can be implemented almost completely in the driver with fairly small amounts
of code. For /Yu, this is trivial, and for /Yc a separate pch action is added
that runs before the actual compilation. After r261774, the first failing
command makes a compilation stop – this means if the pch fails to build the
main compilation won't run, which is what we want. However, in /fallback builds
we need to run the main compilation even if the pch build fails so that the
main compilation's fallback can run. To achieve this, add a ForceSuccessCommand
that pretends that the pch build always succeeded in /fallback builds (the main
compilation will then fail to open the pch and run the fallback cl.exe
invocation).
If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl
will now emit a "not implemented yet; flag ignored" warning that can be
disabled using -Wno-clang-cl-pch.
Since clang-cl doesn't yet serialize some important things (most notably
`pragma comment(lib, ...)`, this feature is disabled by default and only
enabled by an internal driver flag. Once it's more stable, this internal flag
will disappear.
(The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not
as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it
does. Implementing support for this should be possible with the approach in
this patch with minimal frontend changes by passing a --stop-at / --start-at
flag from the driver to the frontend. This is left for a follow-up. I don't
think we ever want to support `#pragma hdrstop`, and supporting it with this
approach isn't easy: This approach relies on the driver knowing the pch
filename in advance, and `#pragma hdrstop(out.pch)` can set the output
filename, so the driver can't know about it in advance.)
clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe
would put them, but the pch file format is of course incompatible. This has
ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in
/fallback builds.
http://reviews.llvm.org/D17695
llvm-svn: 262420
2016-03-02 07:16:44 +08:00
|
|
|
} else {
|
2015-01-10 01:38:53 +08:00
|
|
|
NamedOutput = getDefaultImageName();
|
clang-cl: Implement initial limited support for precompiled headers.
In the gcc precompiled header model, one explicitly runs clang with `-x
c++-header` on a .h file to produce a gch file, and then includes the header
with `-include foo.h` and if a .gch file exists for that header it gets used.
This is documented at
http://clang.llvm.org/docs/UsersManual.html#precompiled-headers
cl.exe's model is fairly different, and controlled by the two flags /Yc and
/Yu. A pch file is generated as a side effect of a regular compilation when
/Ycheader.h is passed. While the compilation is running, the compiler keeps
track of #include lines in the main translation unit and writes everything up
to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells
the compiler to skip all code in the main TU up to and including `#include
"header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu
without an argument, in that case a `#pragma hrdstop` takes the role of
controlling the point where pch ends and real code begins.)
This patch implements limited support for this in that it requires the pch
header to be passed as a /FI force include flag – with this restriction,
it can be implemented almost completely in the driver with fairly small amounts
of code. For /Yu, this is trivial, and for /Yc a separate pch action is added
that runs before the actual compilation. After r261774, the first failing
command makes a compilation stop – this means if the pch fails to build the
main compilation won't run, which is what we want. However, in /fallback builds
we need to run the main compilation even if the pch build fails so that the
main compilation's fallback can run. To achieve this, add a ForceSuccessCommand
that pretends that the pch build always succeeded in /fallback builds (the main
compilation will then fail to open the pch and run the fallback cl.exe
invocation).
If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl
will now emit a "not implemented yet; flag ignored" warning that can be
disabled using -Wno-clang-cl-pch.
Since clang-cl doesn't yet serialize some important things (most notably
`pragma comment(lib, ...)`, this feature is disabled by default and only
enabled by an internal driver flag. Once it's more stable, this internal flag
will disappear.
(The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not
as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it
does. Implementing support for this should be possible with the approach in
this patch with minimal frontend changes by passing a --stop-at / --start-at
flag from the driver to the frontend. This is left for a follow-up. I don't
think we ever want to support `#pragma hdrstop`, and supporting it with this
approach isn't easy: This approach relies on the driver knowing the pch
filename in advance, and `#pragma hdrstop(out.pch)` can set the output
filename, so the driver can't know about it in advance.)
clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe
would put them, but the pch file format is of course incompatible. This has
ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in
/fallback builds.
http://reviews.llvm.org/D17695
llvm-svn: 262420
2016-03-02 07:16:44 +08:00
|
|
|
}
|
|
|
|
} else if (JA.getType() == types::TY_PCH && IsCLMode()) {
|
|
|
|
NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName).c_str());
|
2009-03-18 01:53:55 +08:00
|
|
|
} else {
|
2013-09-06 01:05:56 +08:00
|
|
|
const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
|
2009-03-18 01:53:55 +08:00
|
|
|
assert(Suffix && "All types used for output should have a suffix.");
|
|
|
|
|
|
|
|
std::string::size_type End = std::string::npos;
|
|
|
|
if (!types::appendSuffixForType(JA.getType()))
|
|
|
|
End = BaseName.rfind('.');
|
2013-05-01 06:01:21 +08:00
|
|
|
SmallString<128> Suffixed(BaseName.substr(0, End));
|
[CUDA][OpenMP] Create generic offload action
Summary:
This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs.
This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture.
This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures)
```
0: input, "cudatests.cu", cuda, (host-cuda)
1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
2: compiler, {1}, ir, (host-cuda)
3: input, "cudatests.cu", cuda, (device-cuda, sm_35)
4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35)
5: compiler, {4}, ir, (device-cuda, sm_35)
6: backend, {5}, assembler, (device-cuda, sm_35)
7: assembler, {6}, object, (device-cuda, sm_35)
8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object
9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler
10: input, "cudatests.cu", cuda, (device-cuda, sm_37)
11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37)
12: compiler, {11}, ir, (device-cuda, sm_37)
13: backend, {12}, assembler, (device-cuda, sm_37)
14: assembler, {13}, object, (device-cuda, sm_37)
15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object
16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler
17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda)
18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir
19: backend, {18}, assembler
20: assembler, {19}, object
21: input, "cuda", object
22: input, "cudart", object
23: linker, {20, 21, 22}, image
```
The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine.
Reviewers: echristo, hfinkel, jlebar, ABataev, tra
Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: https://reviews.llvm.org/D18171
llvm-svn: 275645
2016-07-16 07:13:27 +08:00
|
|
|
Suffixed += JA.getOffloadingFileNamePrefix(NormalizedTriple);
|
2016-10-08 05:27:26 +08:00
|
|
|
if (MultipleArchs && !BoundArch.empty()) {
|
2013-05-01 06:01:21 +08:00
|
|
|
Suffixed += "-";
|
|
|
|
Suffixed.append(BoundArch);
|
|
|
|
}
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
// When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for
|
|
|
|
// the unoptimized bitcode so that it does not get overwritten by the ".bc"
|
|
|
|
// optimized bitcode output.
|
|
|
|
if (!AtTopLevel && C.getArgs().hasArg(options::OPT_emit_llvm) &&
|
|
|
|
JA.getType() == types::TY_LLVM_BC)
|
|
|
|
Suffixed += ".tmp";
|
2009-03-18 01:53:55 +08:00
|
|
|
Suffixed += '.';
|
|
|
|
Suffixed += Suffix;
|
|
|
|
NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str());
|
|
|
|
}
|
|
|
|
|
2015-02-03 06:41:48 +08:00
|
|
|
// Prepend object file path if -save-temps=obj
|
|
|
|
if (!AtTopLevel && isSaveTempsObj() && C.getArgs().hasArg(options::OPT_o) &&
|
|
|
|
JA.getType() != types::TY_PCH) {
|
|
|
|
Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
|
|
|
|
SmallString<128> TempPath(FinalOutput->getValue());
|
|
|
|
llvm::sys::path::remove_filename(TempPath);
|
|
|
|
StringRef OutputFileName = llvm::sys::path::filename(NamedOutput);
|
|
|
|
llvm::sys::path::append(TempPath, OutputFileName);
|
|
|
|
NamedOutput = C.getArgs().MakeArgString(TempPath.c_str());
|
|
|
|
}
|
|
|
|
|
2012-07-10 01:31:28 +08:00
|
|
|
// If we're saving temps and the temp file conflicts with the input file,
|
2012-04-21 04:05:08 +08:00
|
|
|
// then avoid overwriting input file.
|
2015-02-03 06:41:48 +08:00
|
|
|
if (!AtTopLevel && isSaveTempsEnabled() && NamedOutput == BaseName) {
|
2012-04-21 04:05:08 +08:00
|
|
|
bool SameFile = false;
|
|
|
|
SmallString<256> Result;
|
|
|
|
llvm::sys::fs::current_path(Result);
|
|
|
|
llvm::sys::path::append(Result, BaseName);
|
|
|
|
llvm::sys::fs::equivalent(BaseInput, Result.c_str(), SameFile);
|
|
|
|
// Must share the same path to conflict.
|
|
|
|
if (SameFile) {
|
|
|
|
StringRef Name = llvm::sys::path::filename(BaseInput);
|
|
|
|
std::pair<StringRef, StringRef> Split = Name.split('.');
|
2015-06-26 23:47:46 +08:00
|
|
|
std::string TmpName = GetTemporaryPath(
|
|
|
|
Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
|
2012-04-21 04:05:08 +08:00
|
|
|
return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
|
|
|
|
}
|
2011-07-16 05:54:29 +08:00
|
|
|
}
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
// As an annoying special case, PCH generation doesn't strip the pathname.
|
2016-03-03 07:29:29 +08:00
|
|
|
if (JA.getType() == types::TY_PCH && !IsCLMode()) {
|
2010-12-18 08:19:12 +08:00
|
|
|
llvm::sys::path::remove_filename(BasePath);
|
|
|
|
if (BasePath.empty())
|
2009-03-18 17:58:30 +08:00
|
|
|
BasePath = NamedOutput;
|
|
|
|
else
|
2010-12-18 08:19:12 +08:00
|
|
|
llvm::sys::path::append(BasePath, NamedOutput);
|
2013-01-25 03:14:47 +08:00
|
|
|
return C.addResultFile(C.getArgs().MakeArgString(BasePath.c_str()), &JA);
|
2009-03-18 01:53:55 +08:00
|
|
|
} else {
|
2013-01-25 03:14:47 +08:00
|
|
|
return C.addResultFile(NamedOutput, &JA);
|
2009-03-18 01:53:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-10-06 13:11:48 +08:00
|
|
|
std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const {
|
2010-03-22 09:52:07 +08:00
|
|
|
// Respect a limited subset of the '-Bprefix' functionality in GCC by
|
2012-10-04 16:08:56 +08:00
|
|
|
// attempting to use this prefix when looking for file paths.
|
2015-06-24 23:10:30 +08:00
|
|
|
for (const std::string &Dir : PrefixDirs) {
|
2011-03-21 21:51:29 +08:00
|
|
|
if (Dir.empty())
|
|
|
|
continue;
|
2015-06-24 23:10:30 +08:00
|
|
|
SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
|
2013-06-25 02:33:43 +08:00
|
|
|
llvm::sys::path::append(P, Name);
|
|
|
|
if (llvm::sys::fs::exists(Twine(P)))
|
2010-03-22 09:52:07 +08:00
|
|
|
return P.str();
|
|
|
|
}
|
|
|
|
|
2013-06-25 02:33:43 +08:00
|
|
|
SmallString<128> P(ResourceDir);
|
|
|
|
llvm::sys::path::append(P, Name);
|
|
|
|
if (llvm::sys::fs::exists(Twine(P)))
|
2011-09-06 10:08:31 +08:00
|
|
|
return P.str();
|
|
|
|
|
2015-06-24 23:10:30 +08:00
|
|
|
for (const std::string &Dir : TC.getFilePaths()) {
|
2011-03-21 21:51:29 +08:00
|
|
|
if (Dir.empty())
|
|
|
|
continue;
|
2015-06-24 23:10:30 +08:00
|
|
|
SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
|
2013-06-25 02:33:43 +08:00
|
|
|
llvm::sys::path::append(P, Name);
|
|
|
|
if (llvm::sys::fs::exists(Twine(P)))
|
2009-09-10 06:33:00 +08:00
|
|
|
return P.str();
|
2009-03-19 04:26:19 +08:00
|
|
|
}
|
|
|
|
|
2009-09-10 06:33:00 +08:00
|
|
|
return Name;
|
2009-03-13 08:51:18 +08:00
|
|
|
}
|
|
|
|
|
2015-06-26 23:47:46 +08:00
|
|
|
void Driver::generatePrefixedToolNames(
|
2016-10-06 13:11:48 +08:00
|
|
|
StringRef Tool, const ToolChain &TC,
|
2015-06-26 23:47:46 +08:00
|
|
|
SmallVectorImpl<std::string> &Names) const {
|
2014-10-26 07:33:21 +08:00
|
|
|
// FIXME: Needs a better variable than DefaultTargetTriple
|
2016-10-06 13:11:48 +08:00
|
|
|
Names.emplace_back((DefaultTargetTriple + "-" + Tool).str());
|
2015-05-30 03:42:19 +08:00
|
|
|
Names.emplace_back(Tool);
|
2015-11-12 23:26:54 +08:00
|
|
|
|
|
|
|
// Allow the discovery of tools prefixed with LLVM's default target triple.
|
|
|
|
std::string LLVMDefaultTargetTriple = llvm::sys::getDefaultTargetTriple();
|
|
|
|
if (LLVMDefaultTargetTriple != DefaultTargetTriple)
|
2016-10-06 13:11:48 +08:00
|
|
|
Names.emplace_back((LLVMDefaultTargetTriple + "-" + Tool).str());
|
2014-10-26 07:33:21 +08:00
|
|
|
}
|
|
|
|
|
2014-11-05 04:26:01 +08:00
|
|
|
static bool ScanDirForExecutable(SmallString<128> &Dir,
|
|
|
|
ArrayRef<std::string> Names) {
|
2014-10-26 07:33:21 +08:00
|
|
|
for (const auto &Name : Names) {
|
|
|
|
llvm::sys::path::append(Dir, Name);
|
|
|
|
if (llvm::sys::fs::can_execute(Twine(Dir)))
|
|
|
|
return true;
|
|
|
|
llvm::sys::path::remove_filename(Dir);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-10-06 13:11:48 +08:00
|
|
|
std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const {
|
2014-10-26 07:33:21 +08:00
|
|
|
SmallVector<std::string, 2> TargetSpecificExecutables;
|
|
|
|
generatePrefixedToolNames(Name, TC, TargetSpecificExecutables);
|
|
|
|
|
2010-03-22 09:52:07 +08:00
|
|
|
// Respect a limited subset of the '-Bprefix' functionality in GCC by
|
2012-10-04 16:08:56 +08:00
|
|
|
// attempting to use this prefix when looking for program paths.
|
2014-09-16 11:48:32 +08:00
|
|
|
for (const auto &PrefixDir : PrefixDirs) {
|
|
|
|
if (llvm::sys::fs::is_directory(PrefixDir)) {
|
|
|
|
SmallString<128> P(PrefixDir);
|
2014-10-26 07:33:21 +08:00
|
|
|
if (ScanDirForExecutable(P, TargetSpecificExecutables))
|
2013-06-19 21:24:29 +08:00
|
|
|
return P.str();
|
2012-10-31 22:39:28 +08:00
|
|
|
} else {
|
2016-10-06 13:11:48 +08:00
|
|
|
SmallString<128> P((PrefixDir + Name).str());
|
2013-06-25 02:33:43 +08:00
|
|
|
if (llvm::sys::fs::can_execute(Twine(P)))
|
2013-06-19 21:24:29 +08:00
|
|
|
return P.str();
|
2012-10-31 20:01:53 +08:00
|
|
|
}
|
2010-03-22 09:52:07 +08:00
|
|
|
}
|
|
|
|
|
2009-03-19 04:26:19 +08:00
|
|
|
const ToolChain::path_list &List = TC.getProgramPaths();
|
2014-09-16 11:48:32 +08:00
|
|
|
for (const auto &Path : List) {
|
|
|
|
SmallString<128> P(Path);
|
2014-10-26 07:33:21 +08:00
|
|
|
if (ScanDirForExecutable(P, TargetSpecificExecutables))
|
2013-06-19 21:24:29 +08:00
|
|
|
return P.str();
|
2009-03-19 04:26:19 +08:00
|
|
|
}
|
|
|
|
|
2009-03-24 00:15:50 +08:00
|
|
|
// If all else failed, search the path.
|
2014-11-08 05:30:32 +08:00
|
|
|
for (const auto &TargetSpecificExecutable : TargetSpecificExecutables)
|
|
|
|
if (llvm::ErrorOr<std::string> P =
|
|
|
|
llvm::sys::findProgramByName(TargetSpecificExecutable))
|
2014-11-04 09:30:55 +08:00
|
|
|
return *P;
|
2009-03-19 05:34:08 +08:00
|
|
|
|
2009-09-10 06:33:00 +08:00
|
|
|
return Name;
|
2009-03-13 08:51:18 +08:00
|
|
|
}
|
|
|
|
|
2016-10-06 13:11:48 +08:00
|
|
|
std::string Driver::GetTemporaryPath(StringRef Prefix, StringRef Suffix) const {
|
2013-06-25 12:26:55 +08:00
|
|
|
SmallString<128> Path;
|
2014-06-12 22:02:15 +08:00
|
|
|
std::error_code EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path);
|
2013-06-25 12:26:55 +08:00
|
|
|
if (EC) {
|
|
|
|
Diag(clang::diag::err_unable_to_make_temp) << EC.message();
|
2009-03-19 03:34:39 +08:00
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
2013-06-25 12:26:55 +08:00
|
|
|
return Path.str();
|
clang-cl: Implement initial limited support for precompiled headers.
In the gcc precompiled header model, one explicitly runs clang with `-x
c++-header` on a .h file to produce a gch file, and then includes the header
with `-include foo.h` and if a .gch file exists for that header it gets used.
This is documented at
http://clang.llvm.org/docs/UsersManual.html#precompiled-headers
cl.exe's model is fairly different, and controlled by the two flags /Yc and
/Yu. A pch file is generated as a side effect of a regular compilation when
/Ycheader.h is passed. While the compilation is running, the compiler keeps
track of #include lines in the main translation unit and writes everything up
to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells
the compiler to skip all code in the main TU up to and including `#include
"header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu
without an argument, in that case a `#pragma hrdstop` takes the role of
controlling the point where pch ends and real code begins.)
This patch implements limited support for this in that it requires the pch
header to be passed as a /FI force include flag – with this restriction,
it can be implemented almost completely in the driver with fairly small amounts
of code. For /Yu, this is trivial, and for /Yc a separate pch action is added
that runs before the actual compilation. After r261774, the first failing
command makes a compilation stop – this means if the pch fails to build the
main compilation won't run, which is what we want. However, in /fallback builds
we need to run the main compilation even if the pch build fails so that the
main compilation's fallback can run. To achieve this, add a ForceSuccessCommand
that pretends that the pch build always succeeded in /fallback builds (the main
compilation will then fail to open the pch and run the fallback cl.exe
invocation).
If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl
will now emit a "not implemented yet; flag ignored" warning that can be
disabled using -Wno-clang-cl-pch.
Since clang-cl doesn't yet serialize some important things (most notably
`pragma comment(lib, ...)`, this feature is disabled by default and only
enabled by an internal driver flag. Once it's more stable, this internal flag
will disappear.
(The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not
as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it
does. Implementing support for this should be possible with the approach in
this patch with minimal frontend changes by passing a --stop-at / --start-at
flag from the driver to the frontend. This is left for a follow-up. I don't
think we ever want to support `#pragma hdrstop`, and supporting it with this
approach isn't easy: This approach relies on the driver knowing the pch
filename in advance, and `#pragma hdrstop(out.pch)` can set the output
filename, so the driver can't know about it in advance.)
clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe
would put them, but the pch file format is of course incompatible. This has
ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in
/fallback builds.
http://reviews.llvm.org/D17695
llvm-svn: 262420
2016-03-02 07:16:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const {
|
|
|
|
SmallString<128> Output;
|
|
|
|
if (Arg *FpArg = C.getArgs().getLastArg(options::OPT__SLASH_Fp)) {
|
|
|
|
// FIXME: If anybody needs it, implement this obscure rule:
|
|
|
|
// "If you specify a directory without a file name, the default file name
|
|
|
|
// is VCx0.pch., where x is the major version of Visual C++ in use."
|
|
|
|
Output = FpArg->getValue();
|
|
|
|
|
|
|
|
// "If you do not specify an extension as part of the path name, an
|
|
|
|
// extension of .pch is assumed. "
|
|
|
|
if (!llvm::sys::path::has_extension(Output))
|
|
|
|
Output += ".pch";
|
|
|
|
} else {
|
|
|
|
Output = BaseName;
|
|
|
|
llvm::sys::path::replace_extension(Output, ".pch");
|
|
|
|
}
|
|
|
|
return Output.str();
|
2009-03-19 03:34:39 +08:00
|
|
|
}
|
|
|
|
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
const ToolChain &Driver::getToolChain(const ArgList &Args,
|
2015-07-11 03:47:55 +08:00
|
|
|
const llvm::Triple &Target) const {
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
|
2012-01-31 10:21:20 +08:00
|
|
|
ToolChain *&TC = ToolChains[Target.str()];
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
if (!TC) {
|
|
|
|
switch (Target.getOS()) {
|
2016-05-12 00:19:05 +08:00
|
|
|
case llvm::Triple::Haiku:
|
|
|
|
TC = new toolchains::Haiku(*this, Target, Args);
|
|
|
|
break;
|
2015-03-26 19:13:44 +08:00
|
|
|
case llvm::Triple::CloudABI:
|
|
|
|
TC = new toolchains::CloudABI(*this, Target, Args);
|
|
|
|
break;
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
case llvm::Triple::Darwin:
|
|
|
|
case llvm::Triple::MacOSX:
|
|
|
|
case llvm::Triple::IOS:
|
2015-10-31 00:30:27 +08:00
|
|
|
case llvm::Triple::TvOS:
|
|
|
|
case llvm::Triple::WatchOS:
|
2013-11-25 07:28:23 +08:00
|
|
|
TC = new toolchains::DarwinClang(*this, Target, Args);
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
break;
|
|
|
|
case llvm::Triple::DragonFly:
|
2012-02-19 09:38:32 +08:00
|
|
|
TC = new toolchains::DragonFly(*this, Target, Args);
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
break;
|
|
|
|
case llvm::Triple::OpenBSD:
|
2012-02-19 09:38:32 +08:00
|
|
|
TC = new toolchains::OpenBSD(*this, Target, Args);
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
break;
|
2012-08-09 07:57:20 +08:00
|
|
|
case llvm::Triple::Bitrig:
|
|
|
|
TC = new toolchains::Bitrig(*this, Target, Args);
|
|
|
|
break;
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
case llvm::Triple::NetBSD:
|
2012-02-19 09:38:32 +08:00
|
|
|
TC = new toolchains::NetBSD(*this, Target, Args);
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
break;
|
|
|
|
case llvm::Triple::FreeBSD:
|
2012-02-19 09:38:32 +08:00
|
|
|
TC = new toolchains::FreeBSD(*this, Target, Args);
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
break;
|
|
|
|
case llvm::Triple::Minix:
|
2012-02-19 09:38:32 +08:00
|
|
|
TC = new toolchains::Minix(*this, Target, Args);
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
break;
|
|
|
|
case llvm::Triple::Linux:
|
2016-06-16 18:36:09 +08:00
|
|
|
case llvm::Triple::ELFIAMCU:
|
2012-01-26 05:03:58 +08:00
|
|
|
if (Target.getArch() == llvm::Triple::hexagon)
|
2015-07-28 00:53:08 +08:00
|
|
|
TC = new toolchains::HexagonToolChain(*this, Target, Args);
|
2015-11-12 23:26:54 +08:00
|
|
|
else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
|
|
|
|
!Target.hasEnvironment())
|
|
|
|
TC = new toolchains::MipsLLVMToolChain(*this, Target, Args);
|
2012-01-26 05:03:58 +08:00
|
|
|
else
|
2012-02-19 09:38:32 +08:00
|
|
|
TC = new toolchains::Linux(*this, Target, Args);
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
break;
|
2015-03-31 04:31:33 +08:00
|
|
|
case llvm::Triple::NaCl:
|
2015-07-28 00:53:08 +08:00
|
|
|
TC = new toolchains::NaClToolChain(*this, Target, Args);
|
2015-03-31 04:31:33 +08:00
|
|
|
break;
|
2016-10-06 14:08:09 +08:00
|
|
|
case llvm::Triple::Fuchsia:
|
|
|
|
TC = new toolchains::Fuchsia(*this, Target, Args);
|
|
|
|
break;
|
2012-02-15 21:39:01 +08:00
|
|
|
case llvm::Triple::Solaris:
|
2012-02-19 09:38:32 +08:00
|
|
|
TC = new toolchains::Solaris(*this, Target, Args);
|
2012-02-15 21:39:01 +08:00
|
|
|
break;
|
2015-07-18 09:49:05 +08:00
|
|
|
case llvm::Triple::AMDHSA:
|
|
|
|
TC = new toolchains::AMDGPUToolChain(*this, Target, Args);
|
|
|
|
break;
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
case llvm::Triple::Win32:
|
2014-03-28 06:50:18 +08:00
|
|
|
switch (Target.getEnvironment()) {
|
|
|
|
default:
|
|
|
|
if (Target.isOSBinFormatELF())
|
|
|
|
TC = new toolchains::Generic_ELF(*this, Target, Args);
|
|
|
|
else if (Target.isOSBinFormatMachO())
|
|
|
|
TC = new toolchains::MachO(*this, Target, Args);
|
|
|
|
else
|
|
|
|
TC = new toolchains::Generic_GCC(*this, Target, Args);
|
|
|
|
break;
|
|
|
|
case llvm::Triple::GNU:
|
2015-07-02 12:45:27 +08:00
|
|
|
TC = new toolchains::MinGW(*this, Target, Args);
|
2014-03-28 06:50:18 +08:00
|
|
|
break;
|
2014-10-24 11:13:37 +08:00
|
|
|
case llvm::Triple::Itanium:
|
|
|
|
TC = new toolchains::CrossWindowsToolChain(*this, Target, Args);
|
|
|
|
break;
|
2014-03-28 06:50:18 +08:00
|
|
|
case llvm::Triple::MSVC:
|
|
|
|
case llvm::Triple::UnknownEnvironment:
|
2014-10-22 10:37:29 +08:00
|
|
|
TC = new toolchains::MSVCToolChain(*this, Target, Args);
|
2014-03-28 06:50:18 +08:00
|
|
|
break;
|
|
|
|
}
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
break;
|
2015-07-14 07:27:56 +08:00
|
|
|
case llvm::Triple::CUDA:
|
|
|
|
TC = new toolchains::CudaToolChain(*this, Target, Args);
|
|
|
|
break;
|
2015-10-14 20:25:43 +08:00
|
|
|
case llvm::Triple::PS4:
|
|
|
|
TC = new toolchains::PS4CPU(*this, Target, Args);
|
|
|
|
break;
|
2016-10-15 04:44:33 +08:00
|
|
|
case llvm::Triple::Contiki:
|
|
|
|
TC = new toolchains::Contiki(*this, Target, Args);
|
|
|
|
break;
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
default:
|
2015-05-27 02:01:33 +08:00
|
|
|
// Of these targets, Hexagon is the only one that might have
|
|
|
|
// an OS of Linux, in which case it got handled above already.
|
2015-08-13 02:36:12 +08:00
|
|
|
switch (Target.getArch()) {
|
|
|
|
case llvm::Triple::tce:
|
2013-03-19 02:10:27 +08:00
|
|
|
TC = new toolchains::TCEToolChain(*this, Target, Args);
|
2015-08-13 02:36:12 +08:00
|
|
|
break;
|
|
|
|
case llvm::Triple::hexagon:
|
2015-07-28 00:53:08 +08:00
|
|
|
TC = new toolchains::HexagonToolChain(*this, Target, Args);
|
2015-08-13 02:36:12 +08:00
|
|
|
break;
|
2016-03-29 05:02:54 +08:00
|
|
|
case llvm::Triple::lanai:
|
|
|
|
TC = new toolchains::LanaiToolChain(*this, Target, Args);
|
|
|
|
break;
|
2015-08-13 02:36:12 +08:00
|
|
|
case llvm::Triple::xcore:
|
2015-07-28 00:53:08 +08:00
|
|
|
TC = new toolchains::XCoreToolChain(*this, Target, Args);
|
2015-08-13 02:36:12 +08:00
|
|
|
break;
|
2015-09-04 06:51:53 +08:00
|
|
|
case llvm::Triple::wasm32:
|
|
|
|
case llvm::Triple::wasm64:
|
|
|
|
TC = new toolchains::WebAssembly(*this, Target, Args);
|
|
|
|
break;
|
2015-08-13 02:36:12 +08:00
|
|
|
default:
|
2015-09-18 03:56:40 +08:00
|
|
|
if (Target.getVendor() == llvm::Triple::Myriad)
|
|
|
|
TC = new toolchains::MyriadToolChain(*this, Target, Args);
|
|
|
|
else if (Target.isOSBinFormatELF())
|
2015-08-13 02:36:12 +08:00
|
|
|
TC = new toolchains::Generic_ELF(*this, Target, Args);
|
|
|
|
else if (Target.isOSBinFormatMachO())
|
|
|
|
TC = new toolchains::MachO(*this, Target, Args);
|
|
|
|
else
|
|
|
|
TC = new toolchains::Generic_GCC(*this, Target, Args);
|
|
|
|
}
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
}
|
2009-05-22 10:53:45 +08:00
|
|
|
}
|
Delete the driver's HostInfo class. This abstraction just never really
did anything. The two big pieces of functionality it tried to provide
was to cache the ToolChain objects for each target, and to figure out
the exact target based on the flag set coming in to an invocation.
However, it had a lot of flaws even with those goals:
- Neither of these have anything to do with the host, or its info.
- The HostInfo class was setup as a full blown class *hierarchy* with
a separate implementation for each "host" OS. This required
dispatching just to create the objects in the first place.
- The hierarchy claimed to represent the host, when in fact it was
based on the target OS.
- Each leaf in the hierarchy was responsible for implementing the flag
processing and caching, resulting in a *lot* of copy-paste code and
quite a few bugs.
- The caching was consistently done based on architecture alone, even
though *any* aspect of the targeted triple might change the behavior
of the configured toolchain.
- Flag processing was already being done in the Driver proper,
separating the flag handling even more than it already is.
Instead of this, we can simply have the dispatch logic in the Driver
which previously created a HostInfo object create the ToolChain objects.
Adding caching in the Driver layer is a tiny amount of code. Finally,
pulling the flag processing into the Driver puts it where it belongs and
consolidates it in one location.
The result is that two functions, and maybe 100 lines of new code
replace over 10 classes and 800 lines of code. Woot.
This also paves the way to introduce more detailed ToolChain objects for
various OSes without threading through a new HostInfo type as well, and
the accompanying boiler plate. That, of course, was the yak I started to
shave that began this entire refactoring escapade. Wheee!
llvm-svn: 148950
2012-01-25 19:01:57 +08:00
|
|
|
return *TC;
|
2009-03-11 07:41:59 +08:00
|
|
|
}
|
2009-03-25 02:57:02 +08:00
|
|
|
|
2013-03-18 23:33:26 +08:00
|
|
|
bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
|
2015-06-12 23:45:21 +08:00
|
|
|
// Say "no" if there is not exactly one input of a type clang understands.
|
2016-02-24 03:30:43 +08:00
|
|
|
if (JA.size() != 1 ||
|
|
|
|
!types::isAcceptedByClang((*JA.input_begin())->getType()))
|
2012-11-15 13:36:36 +08:00
|
|
|
return false;
|
|
|
|
|
2015-06-12 23:45:21 +08:00
|
|
|
// And say "no" if this is not a kind of action clang understands.
|
2012-11-15 13:36:36 +08:00
|
|
|
if (!isa<PreprocessJobAction>(JA) && !isa<PrecompileJobAction>(JA) &&
|
Reapply "Change -save-temps to emit unoptimized bitcode files."
This reapplies r224503 along with a fix for compiling Fortran by having the
clang driver invoke gcc (see r224546, where it was reverted). I have added
a testcase for that as well.
Original commit message:
It is often convenient to use -save-temps to collect the intermediate
results of a compilation, e.g., when triaging a bug report. Besides the
temporary files for preprocessed source and assembly code, this adds the
unoptimized bitcode files as well.
This adds a new BackendJobAction, which is mostly mechanical, to run after
the CompileJobAction. When not using -save-temps, the BackendJobAction is
combined into one job with the CompileJobAction, similar to the way the
integrated assembler is handled. I've implemented this entirely as a
driver change, so under the hood, it is just using -disable-llvm-optzns
to get the unoptimized bitcode.
Based in part on a patch by Steven Wu.
rdar://problem/18909437
llvm-svn: 224688
2014-12-21 15:00:00 +08:00
|
|
|
!isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
|
2012-11-15 13:36:36 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2009-09-09 07:36:43 +08:00
|
|
|
/// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and return the
|
|
|
|
/// grouped values as integers. Numbers which are not provided are set to 0.
|
2009-03-26 23:58:36 +08:00
|
|
|
///
|
2009-09-09 07:36:43 +08:00
|
|
|
/// \return True if the entire string was parsed (9.2), or all groups were
|
|
|
|
/// parsed (10.3.5extrastuff).
|
2016-10-06 13:11:48 +08:00
|
|
|
bool Driver::GetReleaseVersion(StringRef Str, unsigned &Major, unsigned &Minor,
|
|
|
|
unsigned &Micro, bool &HadExtra) {
|
2009-03-26 23:58:36 +08:00
|
|
|
HadExtra = false;
|
|
|
|
|
|
|
|
Major = Minor = Micro = 0;
|
2016-10-06 13:11:48 +08:00
|
|
|
if (Str.empty())
|
2015-04-07 09:03:35 +08:00
|
|
|
return false;
|
2009-03-26 23:58:36 +08:00
|
|
|
|
2016-10-06 13:11:48 +08:00
|
|
|
if (Str.consumeInteger(10, Major))
|
|
|
|
return false;
|
|
|
|
if (Str.empty())
|
2009-03-26 23:58:36 +08:00
|
|
|
return true;
|
2016-10-06 13:11:48 +08:00
|
|
|
if (Str[0] != '.')
|
2009-03-26 23:58:36 +08:00
|
|
|
return false;
|
2009-09-09 07:36:43 +08:00
|
|
|
|
2016-10-06 13:11:48 +08:00
|
|
|
Str = Str.drop_front(1);
|
|
|
|
|
|
|
|
if (Str.consumeInteger(10, Minor))
|
|
|
|
return false;
|
|
|
|
if (Str.empty())
|
2009-03-26 23:58:36 +08:00
|
|
|
return true;
|
2016-10-06 13:11:48 +08:00
|
|
|
if (Str[0] != '.')
|
2009-03-26 23:58:36 +08:00
|
|
|
return false;
|
2016-10-06 13:11:48 +08:00
|
|
|
Str = Str.drop_front(1);
|
2009-03-26 23:58:36 +08:00
|
|
|
|
2016-10-06 13:11:48 +08:00
|
|
|
if (Str.consumeInteger(10, Micro))
|
2009-03-26 23:58:36 +08:00
|
|
|
return false;
|
2016-10-06 13:11:48 +08:00
|
|
|
if (!Str.empty())
|
|
|
|
HadExtra = true;
|
2009-03-26 23:58:36 +08:00
|
|
|
return true;
|
|
|
|
}
|
2013-07-27 08:23:45 +08:00
|
|
|
|
2016-03-31 10:45:46 +08:00
|
|
|
/// Parse digits from a string \p Str and fulfill \p Digits with
|
|
|
|
/// the parsed numbers. This method assumes that the max number of
|
|
|
|
/// digits to look for is equal to Digits.size().
|
|
|
|
///
|
|
|
|
/// \return True if the entire string was parsed and there are
|
|
|
|
/// no extra characters remaining at the end.
|
2016-10-06 13:11:48 +08:00
|
|
|
bool Driver::GetReleaseVersion(StringRef Str,
|
2016-03-31 10:45:46 +08:00
|
|
|
MutableArrayRef<unsigned> Digits) {
|
2016-10-06 13:11:48 +08:00
|
|
|
if (Str.empty())
|
2016-03-31 10:45:46 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned CurDigit = 0;
|
|
|
|
while (CurDigit < Digits.size()) {
|
2016-10-06 13:11:48 +08:00
|
|
|
unsigned Digit;
|
|
|
|
if (Str.consumeInteger(10, Digit))
|
|
|
|
return false;
|
2016-03-31 10:45:46 +08:00
|
|
|
Digits[CurDigit] = Digit;
|
2016-10-06 13:11:48 +08:00
|
|
|
if (Str.empty())
|
2016-03-31 10:45:46 +08:00
|
|
|
return true;
|
2016-10-06 13:11:48 +08:00
|
|
|
if (Str[0] != '.')
|
2016-03-31 10:45:46 +08:00
|
|
|
return false;
|
2016-10-06 13:11:48 +08:00
|
|
|
Str = Str.drop_front(1);
|
2016-03-31 10:45:46 +08:00
|
|
|
CurDigit++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// More digits than requested, bail out...
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-07-27 08:23:45 +08:00
|
|
|
std::pair<unsigned, unsigned> Driver::getIncludeExcludeOptionFlagMasks() const {
|
|
|
|
unsigned IncludedFlagsBitmask = 0;
|
2013-09-25 23:54:41 +08:00
|
|
|
unsigned ExcludedFlagsBitmask = options::NoDriverOption;
|
2013-07-27 08:23:45 +08:00
|
|
|
|
|
|
|
if (Mode == CLMode) {
|
2013-08-01 04:51:53 +08:00
|
|
|
// Include CL and Core options.
|
|
|
|
IncludedFlagsBitmask |= options::CLOption;
|
|
|
|
IncludedFlagsBitmask |= options::CoreOption;
|
2013-07-27 08:23:45 +08:00
|
|
|
} else {
|
|
|
|
ExcludedFlagsBitmask |= options::CLOption;
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_pair(IncludedFlagsBitmask, ExcludedFlagsBitmask);
|
|
|
|
}
|
2014-03-26 02:02:07 +08:00
|
|
|
|
2015-06-04 22:40:44 +08:00
|
|
|
bool clang::driver::isOptimizationLevelFast(const ArgList &Args) {
|
2014-03-26 02:02:07 +08:00
|
|
|
return Args.hasFlag(options::OPT_Ofast, options::OPT_O_Group, false);
|
|
|
|
}
|