llvm-project/clang/lib/Driver/Driver.cpp

3930 lines
147 KiB
C++
Raw Normal View History

//===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "clang/Driver/Driver.h"
#include "InputInfo.h"
[Driver] Consolidate tools and toolchains by target platform. (NFC) Summary: (This is a move-only refactoring patch. There are no functionality changes.) This patch splits apart the Clang driver's tool and toolchain implementation files. Each target platform toolchain is moved to its own file, along with the closest-related tools. Each target platform toolchain has separate headers and implementation files, so the hierarchy of classes is unchanged. There are some remaining shared free functions, mostly from Tools.cpp. Several of these move to their own architecture-specific files, similar to r296056. Some of them are only used by a single target platform; since the tools and toolchains are now together, some helpers now live in a platform-specific file. The balance are helpers related to manipulating argument lists, so they are now in a new file pair, CommonArgs.h and .cpp. I've tried to cluster the code logically, which is fairly straightforward for most of the target platforms and shared architectures. I think I've made reasonable choices for these, as well as the various shared helpers; but of course, I'm happy to hear feedback in the review. There are some particular things I don't like about this patch, but haven't been able to find a better overall solution. The first is the proliferation of files: there are several files that are tiny because the toolchain is not very different from its base (usually the Gnu tools/toolchain). I think this is mostly a reflection of the true complexity, though, so it may not be "fixable" in any reasonable sense. The second thing I don't like are the includes like "../Something.h". I've avoided this largely by clustering into the current file structure. However, a few of these includes remain, and in those cases it doesn't make sense to me to sink an existing file any deeper. Reviewers: rsmith, mehdi_amini, compnerd, rnk, javed.absar Subscribers: emaste, jfb, danalbert, srhines, dschuff, jyknight, nemanjai, nhaehnle, mgorny, cfe-commits Differential Revision: https://reviews.llvm.org/D30372 llvm-svn: 297250
2017-03-08 09:02:16 +08:00
#include "ToolChains/AMDGPU.h"
#include "ToolChains/AVR.h"
#include "ToolChains/Bitrig.h"
#include "ToolChains/Clang.h"
#include "ToolChains/CloudABI.h"
#include "ToolChains/Contiki.h"
#include "ToolChains/CrossWindows.h"
#include "ToolChains/Cuda.h"
#include "ToolChains/Darwin.h"
#include "ToolChains/DragonFly.h"
#include "ToolChains/FreeBSD.h"
#include "ToolChains/Fuchsia.h"
#include "ToolChains/Gnu.h"
#include "ToolChains/Haiku.h"
#include "ToolChains/Hexagon.h"
#include "ToolChains/Lanai.h"
#include "ToolChains/Linux.h"
#include "ToolChains/MinGW.h"
#include "ToolChains/Minix.h"
#include "ToolChains/MipsLinux.h"
[Driver] Consolidate tools and toolchains by target platform. (NFC) Summary: (This is a move-only refactoring patch. There are no functionality changes.) This patch splits apart the Clang driver's tool and toolchain implementation files. Each target platform toolchain is moved to its own file, along with the closest-related tools. Each target platform toolchain has separate headers and implementation files, so the hierarchy of classes is unchanged. There are some remaining shared free functions, mostly from Tools.cpp. Several of these move to their own architecture-specific files, similar to r296056. Some of them are only used by a single target platform; since the tools and toolchains are now together, some helpers now live in a platform-specific file. The balance are helpers related to manipulating argument lists, so they are now in a new file pair, CommonArgs.h and .cpp. I've tried to cluster the code logically, which is fairly straightforward for most of the target platforms and shared architectures. I think I've made reasonable choices for these, as well as the various shared helpers; but of course, I'm happy to hear feedback in the review. There are some particular things I don't like about this patch, but haven't been able to find a better overall solution. The first is the proliferation of files: there are several files that are tiny because the toolchain is not very different from its base (usually the Gnu tools/toolchain). I think this is mostly a reflection of the true complexity, though, so it may not be "fixable" in any reasonable sense. The second thing I don't like are the includes like "../Something.h". I've avoided this largely by clustering into the current file structure. However, a few of these includes remain, and in those cases it doesn't make sense to me to sink an existing file any deeper. Reviewers: rsmith, mehdi_amini, compnerd, rnk, javed.absar Subscribers: emaste, jfb, danalbert, srhines, dschuff, jyknight, nemanjai, nhaehnle, mgorny, cfe-commits Differential Revision: https://reviews.llvm.org/D30372 llvm-svn: 297250
2017-03-08 09:02:16 +08:00
#include "ToolChains/MSVC.h"
#include "ToolChains/Myriad.h"
#include "ToolChains/NaCl.h"
#include "ToolChains/NetBSD.h"
#include "ToolChains/OpenBSD.h"
#include "ToolChains/PS4CPU.h"
#include "ToolChains/Solaris.h"
#include "ToolChains/TCE.h"
#include "ToolChains/WebAssembly.h"
#include "ToolChains/XCore.h"
#include "clang/Basic/Version.h"
#include "clang/Basic/VirtualFileSystem.h"
#include "clang/Config/config.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Job.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptSpecifier.h"
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <memory>
#include <utility>
#if LLVM_ON_UNIX
#include <unistd.h> // getpid
#endif
using namespace clang::driver;
using namespace clang;
using namespace llvm::opt;
Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
DiagnosticsEngine &Diags,
IntrusiveRefCntPtr<vfs::FileSystem> VFS)
: Opts(createDriverOptTable()), Diags(Diags), VFS(std::move(VFS)),
Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone),
LTOMode(LTOK_None), ClangExecutable(ClangExecutable),
SysRoot(DEFAULT_SYSROOT), UseStdLib(true),
DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr),
CCPrintHeadersFilename(nullptr), CCLogDiagnosticsFilename(nullptr),
CCCPrintBindings(false), CCPrintHeaders(false), CCLogDiagnostics(false),
CCGenDiagnostics(false), DefaultTargetTriple(DefaultTargetTriple),
CCCGenericGCCName(""), CheckInputsExist(true), CCCUsePCH(true),
SuppressMissingInputWarning(false) {
// Provide a sane fallback if no VFS is specified.
if (!this->VFS)
this->VFS = vfs::getRealFileSystem();
Name = llvm::sys::path::filename(ClangExecutable);
Dir = llvm::sys::path::parent_path(ClangExecutable);
InstalledDir = Dir; // Provide a sensible default installed dir.
// Compute the path to the resource directory.
StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
SmallString<128> P(Dir);
if (ClangResourceDir != "") {
llvm::sys::path::append(P, ClangResourceDir);
} else {
StringRef ClangLibdirSuffix(CLANG_LIBDIR_SUFFIX);
P = llvm::sys::path::parent_path(Dir);
llvm::sys::path::append(P, Twine("lib") + ClangLibdirSuffix, "clang",
CLANG_VERSION_STRING);
}
ResourceDir = P.str();
}
void Driver::ParseDriverMode(StringRef ProgramName,
ArrayRef<const char *> Args) {
auto Default = ToolChain::getTargetAndModeFromProgramName(ProgramName);
StringRef DefaultMode(Default.second);
setDriverModeFromOption(DefaultMode);
for (const char *ArgPtr : Args) {
// Ingore nullptrs, they are response file's EOL markers
if (ArgPtr == nullptr)
continue;
const StringRef Arg = ArgPtr;
setDriverModeFromOption(Arg);
}
}
void Driver::setDriverModeFromOption(StringRef Opt) {
const std::string OptName =
getOpts().getOption(options::OPT_driver_mode).getPrefixedName();
if (!Opt.startswith(OptName))
return;
StringRef Value = Opt.drop_front(OptName.size());
const unsigned M = llvm::StringSwitch<unsigned>(Value)
.Case("gcc", GCCMode)
.Case("g++", GXXMode)
.Case("cpp", CPPMode)
.Case("cl", CLMode)
.Default(~0U);
if (M != ~0U)
Mode = static_cast<DriverMode>(M);
else
Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
}
InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings) {
llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
unsigned IncludedFlagsBitmask;
unsigned ExcludedFlagsBitmask;
std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
getIncludeExcludeOptionFlagMasks();
unsigned MissingArgIndex, MissingArgCount;
InputArgList Args =
getOpts().ParseArgs(ArgStrings, MissingArgIndex, MissingArgCount,
IncludedFlagsBitmask, ExcludedFlagsBitmask);
// Check for missing argument error.
if (MissingArgCount)
Diag(clang::diag::err_drv_missing_argument)
<< Args.getArgString(MissingArgIndex) << MissingArgCount;
// Check for unsupported options.
for (const Arg *A : Args) {
if (A->getOption().hasFlag(options::Unsupported)) {
Diag(clang::diag::err_drv_unsupported_opt) << A->getAsString(Args);
continue;
}
// Warn about -mcpu= without an argument.
if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) {
Diag(clang::diag::warn_drv_empty_joined_argument) << A->getAsString(Args);
}
}
for (const Arg *A : Args.filtered(options::OPT_UNKNOWN))
Diags.Report(IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl :
diag::err_drv_unknown_argument)
<< A->getAsString(Args);
return Args;
}
// Determine which compilation mode we are in. We look for options which
// affect the phase, starting with the earliest phases, and record which
// option we used to determine the final phase.
phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
Arg **FinalPhaseArg) const {
Arg *PhaseArg = nullptr;
phases::ID FinalPhase;
2011-08-18 06:59:59 +08:00
// -{E,EP,P,M,MM} only run the preprocessor.
if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) ||
(PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) ||
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) {
FinalPhase = phases::Preprocess;
2011-08-18 06:59:59 +08:00
Unrevert r280035 now that the clang-cl bug it exposed has been fixed by r280133. Original commit message: C++ Modules TS: driver support for building modules. This works as follows: we add --precompile to the existing gamut of options for specifying how far to go when compiling an input (-E, -c, -S, etc.). This flag specifies that an input is taken to the precompilation step and no further, and this can be specified when building a .pcm from a module interface or when building a .pch from a header file. The .cppm extension (and some related extensions) are implicitly recognized as C++ module interface files. If --precompile is /not/ specified, the file is compiled (via a .pcm) to a .o file containing the code for the module (and then potentially also assembled and linked, if -S, -c, etc. are not specified). We do not yet suppress the emission of object code for other users of the module interface, so for now this will only work if everything in the .cppm file has vague linkage. As with the existing support for module-map modules, prebuilt modules can be provided as compiler inputs either via the -fmodule-file= command-line argument or via files named ModuleName.pcm in one of the directories specified via -fprebuilt-module-path=. This also exposes the -fmodules-ts cc1 flag in the driver. This is still experimental, and in particular, the concrete syntax is subject to change as the Modules TS evolves in the C++ committee. Unlike -fmodules, this flag does not enable support for implicitly loading module maps nor building modules via the module cache, but those features can be turned on separately and used in conjunction with the Modules TS support. llvm-svn: 280134
2016-08-31 03:06:26 +08:00
// --precompile only runs up to precompilation.
} else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) {
FinalPhase = phases::Precompile;
// -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) ||
(PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) ||
(PhaseArg = DAL.getLastArg(options::OPT_verify_pch)) ||
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) ||
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) ||
(PhaseArg = DAL.getLastArg(options::OPT__migrate)) ||
(PhaseArg = DAL.getLastArg(options::OPT__analyze,
2012-03-07 07:14:35 +08:00
options::OPT__analyze_auto)) ||
(PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
FinalPhase = phases::Compile;
// -S only runs up to the backend.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
FinalPhase = phases::Backend;
// -c compilation only runs up to the assembler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
FinalPhase = phases::Assemble;
// Otherwise do everything.
} else
FinalPhase = phases::Link;
if (FinalPhaseArg)
*FinalPhaseArg = PhaseArg;
return FinalPhase;
}
static Arg *MakeInputArg(DerivedArgList &Args, OptTable &Opts,
StringRef Value) {
Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value,
Args.getBaseArgs().MakeIndex(Value), Value.data());
Args.AddSynthesizedArg(A);
A->claim();
return A;
}
DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
DerivedArgList *DAL = new DerivedArgList(Args);
bool HasNostdlib = Args.hasArg(options::OPT_nostdlib);
bool HasNodefaultlib = Args.hasArg(options::OPT_nodefaultlibs);
for (Arg *A : Args) {
// Unfortunately, we have to parse some forwarding options (-Xassembler,
// -Xlinker, -Xpreprocessor) because we either integrate their functionality
// (assembler and preprocessor), or bypass a previous driver ('collect2').
// Rewrite linker options, to replace --no-demangle with a custom internal
// option.
if ((A->getOption().matches(options::OPT_Wl_COMMA) ||
A->getOption().matches(options::OPT_Xlinker)) &&
A->containsValue("--no-demangle")) {
// Add the rewritten no-demangle argument.
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_Xlinker__no_demangle));
// Add the remaining values as Xlinker arguments.
for (StringRef Val : A->getValues())
if (Val != "--no-demangle")
DAL->AddSeparateArg(A, Opts->getOption(options::OPT_Xlinker), Val);
continue;
}
// Rewrite preprocessor options, to replace -Wp,-MD,FOO which is used by
// some build systems. We don't try to be complete here because we don't
// care to encourage this usage model.
if (A->getOption().matches(options::OPT_Wp_COMMA) &&
(A->getValue(0) == StringRef("-MD") ||
A->getValue(0) == StringRef("-MMD"))) {
// Rewrite to -MD/-MMD along with -MF.
if (A->getValue(0) == StringRef("-MD"))
DAL->AddFlagArg(A, Opts->getOption(options::OPT_MD));
else
DAL->AddFlagArg(A, Opts->getOption(options::OPT_MMD));
if (A->getNumValues() == 2)
DAL->AddSeparateArg(A, Opts->getOption(options::OPT_MF),
A->getValue(1));
continue;
}
// Rewrite reserved library names.
if (A->getOption().matches(options::OPT_l)) {
StringRef Value = A->getValue();
// Rewrite unless -nostdlib is present.
if (!HasNostdlib && !HasNodefaultlib && Value == "stdc++") {
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_stdcxx));
continue;
}
// Rewrite unconditionally.
if (Value == "cc_kext") {
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_cckext));
continue;
}
}
// Pick up inputs via the -- option.
if (A->getOption().matches(options::OPT__DASH_DASH)) {
A->claim();
for (StringRef Val : A->getValues())
DAL->append(MakeInputArg(*DAL, *Opts, Val));
continue;
}
DAL->append(A);
}
// Enforce -static if -miamcu is present.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false))
DAL->AddFlagArg(0, Opts->getOption(options::OPT_static));
// Add a default value of -mlinker-version=, if one was given and the user
// didn't specify one.
#if defined(HOST_LINK_VERSION)
if (!Args.hasArg(options::OPT_mlinker_version_EQ) &&
strlen(HOST_LINK_VERSION) > 0) {
DAL->AddJoinedArg(0, Opts->getOption(options::OPT_mlinker_version_EQ),
HOST_LINK_VERSION);
DAL->getLastArg(options::OPT_mlinker_version_EQ)->claim();
}
#endif
return DAL;
}
/// \brief Compute target triple from args.
///
/// This routine provides the logic to compute a target triple from various
/// args passed to the driver and the default triple string.
static llvm::Triple computeTargetTriple(const Driver &D,
StringRef DefaultTargetTriple,
const ArgList &Args,
StringRef DarwinArchName = "") {
// FIXME: Already done in Compilation *Driver::BuildCompilation
if (const Arg *A = Args.getLastArg(options::OPT_target))
DefaultTargetTriple = A->getValue();
llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple));
// Handle Apple-specific options available here.
if (Target.isOSBinFormatMachO()) {
// If an explict Darwin arch name is given, that trumps all.
if (!DarwinArchName.empty()) {
tools::darwin::setTripleTypeForMachOArchName(Target, DarwinArchName);
return Target;
}
// Handle the Darwin '-arch' flag.
if (Arg *A = Args.getLastArg(options::OPT_arch)) {
StringRef ArchName = A->getValue();
tools::darwin::setTripleTypeForMachOArchName(Target, ArchName);
}
}
// Handle pseudo-target flags '-mlittle-endian'/'-EL' and
// '-mbig-endian'/'-EB'.
if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
options::OPT_mbig_endian)) {
if (A->getOption().matches(options::OPT_mlittle_endian)) {
llvm::Triple LE = Target.getLittleEndianArchVariant();
if (LE.getArch() != llvm::Triple::UnknownArch)
Target = std::move(LE);
} else {
llvm::Triple BE = Target.getBigEndianArchVariant();
if (BE.getArch() != llvm::Triple::UnknownArch)
Target = std::move(BE);
}
}
// Skip further flag support on OSes which don't support '-m32' or '-m64'.
if (Target.getArch() == llvm::Triple::tce ||
Target.getOS() == llvm::Triple::Minix)
return Target;
// Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'.
Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32,
options::OPT_m32, options::OPT_m16);
if (A) {
llvm::Triple::ArchType AT = llvm::Triple::UnknownArch;
if (A->getOption().matches(options::OPT_m64)) {
AT = Target.get64BitArchVariant().getArch();
if (Target.getEnvironment() == llvm::Triple::GNUX32)
Target.setEnvironment(llvm::Triple::GNU);
} else if (A->getOption().matches(options::OPT_mx32) &&
Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) {
AT = llvm::Triple::x86_64;
Target.setEnvironment(llvm::Triple::GNUX32);
} else if (A->getOption().matches(options::OPT_m32)) {
AT = Target.get32BitArchVariant().getArch();
if (Target.getEnvironment() == llvm::Triple::GNUX32)
Target.setEnvironment(llvm::Triple::GNU);
} else if (A->getOption().matches(options::OPT_m16) &&
Target.get32BitArchVariant().getArch() == llvm::Triple::x86) {
AT = llvm::Triple::x86;
Target.setEnvironment(llvm::Triple::CODE16);
}
if (AT != llvm::Triple::UnknownArch && AT != Target.getArch())
Target.setArch(AT);
}
// Handle -miamcu flag.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86)
D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu"
<< Target.str();
if (A && !A->getOption().matches(options::OPT_m32))
D.Diag(diag::err_drv_argument_not_allowed_with)
<< "-miamcu" << A->getBaseArg().getAsString(Args);
Target.setArch(llvm::Triple::x86);
Target.setArchName("i586");
Target.setEnvironment(llvm::Triple::UnknownEnvironment);
Target.setEnvironmentName("");
Target.setOS(llvm::Triple::ELFIAMCU);
Target.setVendor(llvm::Triple::UnknownVendor);
Target.setVendorName("intel");
}
return Target;
}
// \brief Parse the LTO options and record the type of LTO compilation
// based on which -f(no-)?lto(=.*)? option occurs last.
void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
LTOMode = LTOK_None;
if (!Args.hasFlag(options::OPT_flto, options::OPT_flto_EQ,
options::OPT_fno_lto, false))
return;
StringRef LTOName("full");
const Arg *A = Args.getLastArg(options::OPT_flto_EQ);
if (A)
LTOName = A->getValue();
LTOMode = llvm::StringSwitch<LTOKind>(LTOName)
.Case("full", LTOK_Full)
.Case("thin", LTOK_Thin)
.Default(LTOK_Unknown);
if (LTOMode == LTOK_Unknown) {
assert(A);
Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName()
<< A->getValue();
}
}
/// Compute the desired OpenMP runtime from the flags provided.
Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const {
StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME);
const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ);
if (A)
RuntimeName = A->getValue();
auto RT = llvm::StringSwitch<OpenMPRuntimeKind>(RuntimeName)
.Case("libomp", OMPRT_OMP)
.Case("libgomp", OMPRT_GOMP)
.Case("libiomp5", OMPRT_IOMP5)
.Default(OMPRT_Unknown);
if (RT == OMPRT_Unknown) {
if (A)
Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << A->getValue();
else
// FIXME: We could use a nicer diagnostic here.
Diag(diag::err_drv_unsupported_opt) << "-fopenmp";
}
return RT;
}
void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
InputList &Inputs) {
//
// CUDA
//
// We need to generate a CUDA toolchain if any of the inputs has a CUDA type.
if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
return types::isCuda(I.first);
})) {
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
const llvm::Triple &HostTriple = HostTC->getTriple();
llvm::Triple CudaTriple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda"
: "nvptx-nvidia-cuda");
// Use the CUDA and host triples as the key into the ToolChains map, because
// the device toolchain we create depends on both.
auto &CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()];
if (!CudaTC) {
CudaTC = llvm::make_unique<toolchains::CudaToolChain>(
*this, CudaTriple, *HostTC, C.getInputArgs());
}
C.addOffloadDeviceToolChain(CudaTC.get(), Action::OFK_Cuda);
}
//
// OpenMP
//
// We need to generate an OpenMP toolchain if the user specified targets with
// the -fopenmp-targets option.
if (Arg *OpenMPTargets =
C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
if (OpenMPTargets->getNumValues()) {
// We expect that -fopenmp-targets is always used in conjunction with the
// option -fopenmp specifying a valid runtime with offloading support,
// i.e. libomp or libiomp.
bool HasValidOpenMPRuntime = C.getInputArgs().hasFlag(
options::OPT_fopenmp, options::OPT_fopenmp_EQ,
options::OPT_fno_openmp, false);
if (HasValidOpenMPRuntime) {
OpenMPRuntimeKind OpenMPKind = getOpenMPRuntime(C.getInputArgs());
HasValidOpenMPRuntime =
OpenMPKind == OMPRT_OMP || OpenMPKind == OMPRT_IOMP5;
}
if (HasValidOpenMPRuntime) {
llvm::StringMap<const char *> FoundNormalizedTriples;
for (const char *Val : OpenMPTargets->getValues()) {
llvm::Triple TT(Val);
std::string NormalizedName = TT.normalize();
// Make sure we don't have a duplicate triple.
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
if (Duplicate != FoundNormalizedTriples.end()) {
Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
<< Val << Duplicate->second;
continue;
}
// Store the current triple so that we can check for duplicates in the
// following iterations.
FoundNormalizedTriples[NormalizedName] = Val;
// If the specified target is invalid, emit a diagnostic.
if (TT.getArch() == llvm::Triple::UnknownArch)
Diag(clang::diag::err_drv_invalid_omp_target) << Val;
else {
const ToolChain &TC = getToolChain(C.getInputArgs(), TT);
C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
}
}
} else
Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets);
} else
Diag(clang::diag::warn_drv_empty_joined_argument)
<< OpenMPTargets->getAsString(C.getInputArgs());
}
//
// TODO: Add support for other offloading programming models here.
//
return;
}
Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
llvm::PrettyStackTraceString CrashInfo("Compilation construction");
2011-08-18 06:59:59 +08:00
// FIXME: Handle environment options which affect driver behavior, somewhere
2012-03-13 05:24:57 +08:00
// (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS.
if (Optional<std::string> CompilerPathValue =
llvm::sys::Process::GetEnv("COMPILER_PATH")) {
StringRef CompilerPath = *CompilerPathValue;
while (!CompilerPath.empty()) {
std::pair<StringRef, StringRef> Split =
CompilerPath.split(llvm::sys::EnvPathSeparator);
PrefixDirs.push_back(Split.first);
CompilerPath = Split.second;
}
}
// We look for the driver mode option early, because the mode can affect
// how other options are parsed.
ParseDriverMode(ClangExecutable, ArgList.slice(1));
// FIXME: What are we going to do with -V and -b?
// FIXME: This stuff needs to go into the Compilation, not the driver.
bool CCCPrintPhases;
InputArgList Args = ParseArgStrings(ArgList.slice(1));
// Silence driver warnings if requested
Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
// -no-canonical-prefixes is used very early in main.
Args.ClaimAllArgs(options::OPT_no_canonical_prefixes);
// Ignore -pipe.
Args.ClaimAllArgs(options::OPT_pipe);
// Extract -ccc args.
//
// FIXME: We need to figure out where this behavior should live. Most of it
// should be outside in the client; the parts that aren't should have proper
// options, either by introducing new ones or by overloading gcc ones like -V
// or -b.
CCCPrintPhases = Args.hasArg(options::OPT_ccc_print_phases);
CCCPrintBindings = Args.hasArg(options::OPT_ccc_print_bindings);
if (const Arg *A = Args.getLastArg(options::OPT_ccc_gcc_name))
CCCGenericGCCName = A->getValue();
CCCUsePCH =
Args.hasFlag(options::OPT_ccc_pch_is_pch, options::OPT_ccc_pch_is_pth);
// FIXME: DefaultTargetTriple is used by the target-prefixed calls to as/ld
// and getToolChain is const.
if (IsCLMode()) {
// clang-cl targets MSVC-style Win32.
llvm::Triple T(DefaultTargetTriple);
T.setOS(llvm::Triple::Win32);
T.setVendor(llvm::Triple::PC);
T.setEnvironment(llvm::Triple::MSVC);
DefaultTargetTriple = T.str();
}
if (const Arg *A = Args.getLastArg(options::OPT_target))
DefaultTargetTriple = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT_ccc_install_dir))
Dir = InstalledDir = A->getValue();
for (const Arg *A : Args.filtered(options::OPT_B)) {
A->claim();
PrefixDirs.push_back(A->getValue(0));
}
if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ))
SysRoot = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT__dyld_prefix_EQ))
DyldPrefix = A->getValue();
if (Args.hasArg(options::OPT_nostdlib))
UseStdLib = false;
if (const Arg *A = Args.getLastArg(options::OPT_resource_dir))
ResourceDir = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT_save_temps_EQ)) {
SaveTemps = llvm::StringSwitch<SaveTempsMode>(A->getValue())
.Case("cwd", SaveTempsCwd)
.Case("obj", SaveTempsObj)
.Default(SaveTempsCwd);
}
setLTOMode(Args);
// Process -fembed-bitcode= flags.
if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
StringRef Name = A->getValue();
unsigned Model = llvm::StringSwitch<unsigned>(Name)
.Case("off", EmbedNone)
.Case("all", EmbedBitcode)
.Case("bitcode", EmbedBitcode)
.Case("marker", EmbedMarker)
.Default(~0U);
if (Model == ~0U) {
Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
<< Name;
} else
BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
}
std::unique_ptr<llvm::opt::InputArgList> UArgs =
llvm::make_unique<InputArgList>(std::move(Args));
// Perform the default argument translations.
DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs);
// Owned by the host.
const ToolChain &TC = getToolChain(
*UArgs, computeTargetTriple(*this, DefaultTargetTriple, *UArgs));
// The compilation takes ownership of Args.
Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
if (!HandleImmediateArgs(*C))
return C;
// Construct the list of inputs.
InputList Inputs;
BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs);
// Populate the tool chains for the offloading devices, if any.
CreateOffloadingDeviceToolChains(*C, Inputs);
// Construct the list of abstract actions to perform for this compilation. On
// MachO targets this uses the driver-driver and universal actions.
if (TC.getTriple().isOSBinFormatMachO())
BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
else
BuildActions(*C, C->getArgs(), Inputs, C->getActions());
if (CCCPrintPhases) {
PrintActions(*C);
return C;
}
BuildJobs(*C);
return C;
}
static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) {
llvm::opt::ArgStringList ASL;
for (const auto *A : Args)
A->render(Args, ASL);
for (auto I = ASL.begin(), E = ASL.end(); I != E; ++I) {
if (I != ASL.begin())
OS << ' ';
Command::printArg(OS, *I, true);
}
OS << '\n';
}
bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename,
SmallString<128> &CrashDiagDir) {
using namespace llvm::sys;
assert(llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin() &&
"Only knows about .crash files on Darwin");
// The .crash file can be found on at ~/Library/Logs/DiagnosticReports/
// (or /Library/Logs/DiagnosticReports for root) and has the filename pattern
// clang-<VERSION>_<YYYY-MM-DD-HHMMSS>_<hostname>.crash.
path::home_directory(CrashDiagDir);
if (CrashDiagDir.startswith("/var/root"))
CrashDiagDir = "/";
path::append(CrashDiagDir, "Library/Logs/DiagnosticReports");
int PID =
#if LLVM_ON_UNIX
getpid();
#else
0;
#endif
std::error_code EC;
fs::file_status FileStatus;
TimePoint<> LastAccessTime;
SmallString<128> CrashFilePath;
// Lookup the .crash files and get the one generated by a subprocess spawned
// by this driver invocation.
for (fs::directory_iterator File(CrashDiagDir, EC), FileEnd;
File != FileEnd && !EC; File.increment(EC)) {
StringRef FileName = path::filename(File->path());
if (!FileName.startswith(Name))
continue;
if (fs::status(File->path(), FileStatus))
continue;
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CrashFile =
llvm::MemoryBuffer::getFile(File->path());
if (!CrashFile)
continue;
// The first line should start with "Process:", otherwise this isn't a real
// .crash file.
StringRef Data = CrashFile.get()->getBuffer();
if (!Data.startswith("Process:"))
continue;
// Parse parent process pid line, e.g: "Parent Process: clang-4.0 [79141]"
size_t ParentProcPos = Data.find("Parent Process:");
if (ParentProcPos == StringRef::npos)
continue;
size_t LineEnd = Data.find_first_of("\n", ParentProcPos);
if (LineEnd == StringRef::npos)
continue;
StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim();
int OpenBracket = -1, CloseBracket = -1;
for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) {
if (ParentProcess[i] == '[')
OpenBracket = i;
if (ParentProcess[i] == ']')
CloseBracket = i;
}
// Extract the parent process PID from the .crash file and check whether
// it matches this driver invocation pid.
int CrashPID;
if (OpenBracket < 0 || CloseBracket < 0 ||
ParentProcess.slice(OpenBracket + 1, CloseBracket)
.getAsInteger(10, CrashPID) || CrashPID != PID) {
continue;
}
// Found a .crash file matching the driver pid. To avoid getting an older
// and misleading crash file, continue looking for the most recent.
// FIXME: the driver can dispatch multiple cc1 invocations, leading to
// multiple crashes poiting to the same parent process. Since the driver
// does not collect pid information for the dispatched invocation there's
// currently no way to distinguish among them.
const auto FileAccessTime = FileStatus.getLastModificationTime();
if (FileAccessTime > LastAccessTime) {
CrashFilePath.assign(File->path());
LastAccessTime = FileAccessTime;
}
}
// If found, copy it over to the location of other reproducer files.
if (!CrashFilePath.empty()) {
EC = fs::copy_file(CrashFilePath, ReproCrashFilename);
if (EC)
return false;
return true;
}
return false;
}
2011-08-18 06:59:59 +08:00
// When clang crashes, produce diagnostic information including the fully
// preprocessed source file(s). Request that the developer attach the
// diagnostic information to a bug report.
void Driver::generateCompilationDiagnostics(Compilation &C,
const Command &FailingCommand) {
if (C.getArgs().hasArg(options::OPT_fno_crash_diagnostics))
return;
// Don't try to generate diagnostics for link or dsymutil jobs.
if (FailingCommand.getCreator().isLinkJob() ||
FailingCommand.getCreator().isDsymutilJob())
return;
// Print the version of the compiler.
PrintVersion(C, llvm::errs());
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "PLEASE submit a bug report to " BUG_REPORT_URL " and include the "
"crash backtrace, preprocessed source, and associated run script.";
// Suppress driver output and emit preprocessor output to temp file.
Mode = CPPMode;
CCGenDiagnostics = true;
// Save the original job command(s).
Command Cmd = FailingCommand;
// Keep track of whether we produce any errors while trying to produce
// preprocessed sources.
DiagnosticErrorTrap Trap(Diags);
// Suppress tool output.
C.initCompilationForDiagnostics();
// Construct the list of inputs.
InputList Inputs;
BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs);
for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) {
bool IgnoreInput = false;
// Ignore input from stdin or any inputs that cannot be preprocessed.
// Check type first as not all linker inputs have a value.
if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
IgnoreInput = true;
} else if (!strcmp(it->second->getValue(), "-")) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - "
"ignoring input from stdin.";
IgnoreInput = true;
}
if (IgnoreInput) {
it = Inputs.erase(it);
ie = Inputs.end();
} else {
++it;
}
}
if (Inputs.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - "
"no preprocessable inputs.";
return;
}
// Don't attempt to generate preprocessed files if multiple -arch options are
// used, unless they're all duplicates.
llvm::StringSet<> ArchNames;
for (const Arg *A : C.getArgs()) {
if (A->getOption().matches(options::OPT_arch)) {
StringRef ArchName = A->getValue();
ArchNames.insert(ArchName);
}
}
if (ArchNames.size() > 1) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - cannot generate "
"preprocessed source with multiple -arch options.";
return;
}
// Construct the list of abstract actions to perform for this compilation. On
// Darwin OSes this uses the driver-driver and builds universal actions.
const ToolChain &TC = C.getDefaultToolChain();
if (TC.getTriple().isOSBinFormatMachO())
BuildUniversalActions(C, TC, Inputs);
else
BuildActions(C, C.getArgs(), Inputs, C.getActions());
BuildJobs(C);
// If there were errors building the compilation, quit now.
if (Trap.hasErrorOccurred()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
// Generate preprocessed output.
SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
C.ExecuteJobs(C.getJobs(), FailingCommands);
// If any of the preprocessing commands failed, clean up and exit.
if (!FailingCommands.empty()) {
if (!isSaveTempsEnabled())
C.CleanupFileList(C.getTempFiles(), true);
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
const ArgStringList &TempFiles = C.getTempFiles();
if (TempFiles.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "\n********************\n\n"
"PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n"
"Preprocessed source(s) and associated run script(s) are located at:";
SmallString<128> VFS;
SmallString<128> ReproCrashFilename;
for (const char *TempFile : TempFiles) {
Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile;
if (ReproCrashFilename.empty()) {
ReproCrashFilename = TempFile;
llvm::sys::path::replace_extension(ReproCrashFilename, ".crash");
}
if (StringRef(TempFile).endswith(".cache")) {
// In some cases (modules) we'll dump extra data to help with reproducing
// the crash into a directory next to the output.
VFS = llvm::sys::path::filename(TempFile);
llvm::sys::path::append(VFS, "vfs", "vfs.yaml");
}
}
// Assume associated files are based off of the first temporary file.
CrashReportInfo CrashInfo(TempFiles[0], VFS);
std::string Script = CrashInfo.Filename.rsplit('.').first.str() + ".sh";
std::error_code EC;
llvm::raw_fd_ostream ScriptOS(Script, EC, llvm::sys::fs::F_Excl);
if (EC) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating run script: " + Script + " " + EC.message();
} else {
ScriptOS << "# Crash reproducer for " << getClangFullVersion() << "\n"
<< "# Driver args: ";
printArgList(ScriptOS, C.getInputArgs());
ScriptOS << "# Original command: ";
Cmd.Print(ScriptOS, "\n", /*Quote=*/true);
Cmd.Print(ScriptOS, "\n", /*Quote=*/true, &CrashInfo);
Diag(clang::diag::note_drv_command_failed_diag_msg) << Script;
}
// On darwin, provide information about the .crash diagnostic report.
if (llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin()) {
SmallString<128> CrashDiagDir;
if (getCrashDiagnosticFile(ReproCrashFilename, CrashDiagDir)) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< ReproCrashFilename.str();
} else { // Suggest a directory for the user to look for .crash files.
llvm::sys::path::append(CrashDiagDir, Name);
CrashDiagDir += "_<YYYY-MM-DD-HHMMSS>_<hostname>.crash";
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Crash backtrace is located in";
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< CrashDiagDir.str();
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "(choose the .crash file that corresponds to your crash)";
}
}
for (const auto &A : C.getArgs().filtered(options::OPT_frewrite_map_file,
options::OPT_frewrite_map_file_EQ))
Diag(clang::diag::note_drv_command_failed_diag_msg) << A->getValue();
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "\n\n********************";
}
void Driver::setUpResponseFiles(Compilation &C, Command &Cmd) {
// Since commandLineFitsWithinSystemLimits() may underestimate system's capacity
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
// if the tool does not support response files, there is a chance/ that things
// will just work without a response file, so we silently just skip it.
if (Cmd.getCreator().getResponseFilesSupport() == Tool::RF_None ||
llvm::sys::commandLineFitsWithinSystemLimits(Cmd.getExecutable(), Cmd.getArguments()))
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
return;
std::string TmpName = GetTemporaryPath("response", "txt");
Cmd.setResponseFile(C.addTempFile(C.getArgs().MakeArgString(TmpName)));
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
}
int Driver::ExecuteCompilation(
Compilation &C,
SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands) {
// Just print if -### was present.
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
C.getJobs().Print(llvm::errs(), "\n", true);
return 0;
}
// If there were errors building the compilation, quit now.
if (Diags.hasErrorOccurred())
return 1;
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
// Set up response file names for each command, if necessary
for (auto &Job : C.getJobs())
setUpResponseFiles(C, Job);
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
C.ExecuteJobs(C.getJobs(), FailingCommands);
// Remove temp files.
C.CleanupFileList(C.getTempFiles());
// If the command succeeded, we are done.
if (FailingCommands.empty())
return 0;
// Otherwise, remove result files and print extra information about abnormal
// failures.
for (const auto &CmdPair : FailingCommands) {
int Res = CmdPair.first;
const Command *FailingCommand = CmdPair.second;
// Remove result files if we're not saving temps.
if (!isSaveTempsEnabled()) {
const JobAction *JA = cast<JobAction>(&FailingCommand->getSource());
C.CleanupFileMap(C.getResultFiles(), JA, true);
// Failure result files are valid unless we crashed.
if (Res < 0)
C.CleanupFileMap(C.getFailureResultFiles(), JA, true);
}
// Print extra information about abnormal failures, if possible.
//
// This is ad-hoc, but we don't want to be excessively noisy. If the result
// status was 1, assume the command failed normally. In particular, if it
// was the compiler then assume it gave a reasonable error code. Failures
// in other tools are less common, and they generally have worse
// diagnostics, so always print the diagnostic there.
const Tool &FailingTool = FailingCommand->getCreator();
if (!FailingCommand->getCreator().hasGoodDiagnostics() || Res != 1) {
// FIXME: See FIXME above regarding result code interpretation.
if (Res < 0)
Diag(clang::diag::err_drv_command_signalled)
<< FailingTool.getShortName();
else
Diag(clang::diag::err_drv_command_failed) << FailingTool.getShortName()
<< Res;
}
}
return 0;
}
void Driver::PrintHelp(bool ShowHidden) const {
unsigned IncludedFlagsBitmask;
unsigned ExcludedFlagsBitmask;
std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
getIncludeExcludeOptionFlagMasks();
ExcludedFlagsBitmask |= options::NoDriverOption;
if (!ShowHidden)
ExcludedFlagsBitmask |= HelpHidden;
getOpts().PrintHelp(llvm::outs(), Name.c_str(), DriverTitle.c_str(),
IncludedFlagsBitmask, ExcludedFlagsBitmask);
}
void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
// FIXME: The following handlers should use a callback mechanism, we don't
// know what the client would like to do.
OS << getClangFullVersion() << '\n';
const ToolChain &TC = C.getDefaultToolChain();
OS << "Target: " << TC.getTripleString() << '\n';
// Print the threading model.
if (Arg *A = C.getArgs().getLastArg(options::OPT_mthread_model)) {
// Don't print if the ToolChain would have barfed on it already
if (TC.isThreadModelSupported(A->getValue()))
OS << "Thread model: " << A->getValue();
} else
OS << "Thread model: " << TC.getThreadModel();
OS << '\n';
// Print out the install directory.
OS << "InstalledDir: " << InstalledDir << '\n';
}
/// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
/// option.
static void PrintDiagnosticCategories(raw_ostream &OS) {
// Skip the empty category.
for (unsigned i = 1, max = DiagnosticIDs::getNumberOfCategories(); i != max;
++i)
OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n';
}
bool Driver::HandleImmediateArgs(const Compilation &C) {
// The order these options are handled in gcc is all over the place, but we
// don't expect inconsistencies w.r.t. that to matter in practice.
if (C.getArgs().hasArg(options::OPT_dumpmachine)) {
llvm::outs() << C.getDefaultToolChain().getTripleString() << '\n';
return false;
}
if (C.getArgs().hasArg(options::OPT_dumpversion)) {
// Since -dumpversion is only implemented for pedantic GCC compatibility, we
// return an answer which matches our definition of __VERSION__.
//
// If we want to return a more correct answer some day, then we should
// introduce a non-pedantically GCC compatible mode to Clang in which we
// provide sensible definitions for -dumpversion, __VERSION__, etc.
llvm::outs() << "4.2.1\n";
return false;
}
if (C.getArgs().hasArg(options::OPT__print_diagnostic_categories)) {
PrintDiagnosticCategories(llvm::outs());
return false;
}
if (C.getArgs().hasArg(options::OPT_help) ||
C.getArgs().hasArg(options::OPT__help_hidden)) {
PrintHelp(C.getArgs().hasArg(options::OPT__help_hidden));
return false;
}
if (C.getArgs().hasArg(options::OPT__version)) {
// Follow gcc behavior and use stdout for --version and stderr for -v.
PrintVersion(C, llvm::outs());
return false;
}
if (C.getArgs().hasArg(options::OPT_v) ||
C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
PrintVersion(C, llvm::errs());
SuppressMissingInputWarning = true;
}
const ToolChain &TC = C.getDefaultToolChain();
if (C.getArgs().hasArg(options::OPT_v))
TC.printVerboseInfo(llvm::errs());
if (C.getArgs().hasArg(options::OPT_print_search_dirs)) {
llvm::outs() << "programs: =";
bool separator = false;
for (const std::string &Path : TC.getProgramPaths()) {
if (separator)
llvm::outs() << ':';
llvm::outs() << Path;
separator = true;
}
llvm::outs() << "\n";
llvm::outs() << "libraries: =" << ResourceDir;
2012-04-16 12:16:43 +08:00
StringRef sysroot = C.getSysRoot();
for (const std::string &Path : TC.getFilePaths()) {
// Always print a separator. ResourceDir was the first item shown.
llvm::outs() << ':';
// Interpretation of leading '=' is needed only for NetBSD.
if (Path[0] == '=')
llvm::outs() << sysroot << Path.substr(1);
else
llvm::outs() << Path;
}
llvm::outs() << "\n";
return false;
}
// FIXME: The following handlers should use a callback mechanism, we don't
// know what the client would like to do.
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_file_name_EQ)) {
llvm::outs() << GetFilePath(A->getValue(), TC) << "\n";
return false;
}
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_prog_name_EQ)) {
llvm::outs() << GetProgramPath(A->getValue(), TC) << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
switch (RLT) {
case ToolChain::RLT_CompilerRT:
llvm::outs() << TC.getCompilerRT(C.getArgs(), "builtins") << "\n";
break;
case ToolChain::RLT_Libgcc:
llvm::outs() << GetFilePath("libgcc.a", TC) << "\n";
break;
}
return false;
}
if (C.getArgs().hasArg(options::OPT_print_multi_lib)) {
for (const Multilib &Multilib : TC.getMultilibs())
llvm::outs() << Multilib << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_multi_directory)) {
for (const Multilib &Multilib : TC.getMultilibs()) {
if (Multilib.gccSuffix().empty())
llvm::outs() << ".\n";
else {
StringRef Suffix(Multilib.gccSuffix());
assert(Suffix.front() == '/');
llvm::outs() << Suffix.substr(1) << "\n";
}
}
return false;
}
return true;
}
// Display an action graph human-readably. Action A is the "sink" node
// and latest-occuring action. Traversal is in pre-order, visiting the
// inputs to each action before printing the action itself.
static unsigned PrintActions1(const Compilation &C, Action *A,
std::map<Action *, unsigned> &Ids) {
if (Ids.count(A)) // A was already visited.
return Ids[A];
std::string str;
llvm::raw_string_ostream os(str);
os << Action::getClassName(A->getKind()) << ", ";
if (InputAction *IA = dyn_cast<InputAction>(A)) {
os << "\"" << IA->getInputArg().getValue() << "\"";
} else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
os << '"' << BIA->getArchName() << '"' << ", {"
<< PrintActions1(C, *BIA->input_begin(), Ids) << "}";
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
} else if (OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
bool IsFirst = true;
OA->doOnEachDependence(
[&](Action *A, const ToolChain *TC, const char *BoundArch) {
// E.g. for two CUDA device dependences whose bound arch is sm_20 and
// sm_35 this will generate:
// "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device"
// (nvptx64-nvidia-cuda:sm_35) {#ID}
if (!IsFirst)
os << ", ";
os << '"';
if (TC)
os << A->getOffloadingKindPrefix();
else
os << "host";
os << " (";
os << TC->getTriple().normalize();
if (BoundArch)
os << ":" << BoundArch;
os << ")";
os << '"';
os << " {" << PrintActions1(C, A, Ids) << "}";
IsFirst = false;
});
} else {
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
const ActionList *AL = &A->getInputs();
if (AL->size()) {
const char *Prefix = "{";
for (Action *PreRequisite : *AL) {
os << Prefix << PrintActions1(C, PreRequisite, Ids);
Prefix = ", ";
}
os << "}";
} else
os << "{}";
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Append offload info for all options other than the offloading action
// itself (e.g. (cuda-device, sm_20) or (cuda-host)).
std::string offload_str;
llvm::raw_string_ostream offload_os(offload_str);
if (!isa<OffloadAction>(A)) {
auto S = A->getOffloadingKindPrefix();
if (!S.empty()) {
offload_os << ", (" << S;
if (A->getOffloadingArch())
offload_os << ", " << A->getOffloadingArch();
offload_os << ")";
}
}
unsigned Id = Ids.size();
Ids[A] = Id;
llvm::errs() << Id << ": " << os.str() << ", "
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
<< types::getTypeName(A->getType()) << offload_os.str() << "\n";
return Id;
}
// Print the action graphs in a compilation C.
// For example "clang -c file1.c file2.c" is composed of two subgraphs.
void Driver::PrintActions(const Compilation &C) const {
std::map<Action *, unsigned> Ids;
for (Action *A : C.getActions())
PrintActions1(C, A, Ids);
}
/// \brief Check whether the given input tree contains any compilation or
/// assembly actions.
static bool ContainsCompileOrAssembleAction(const Action *A) {
if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A) ||
isa<AssembleJobAction>(A))
return true;
for (const Action *Input : A->inputs())
if (ContainsCompileOrAssembleAction(Input))
return true;
return false;
}
void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
const InputList &BAInputs) const {
DerivedArgList &Args = C.getArgs();
ActionList &Actions = C.getActions();
llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
// Collect the list of architectures. Duplicates are allowed, but should only
// be handled once (in the order seen).
llvm::StringSet<> ArchNames;
SmallVector<const char *, 4> Archs;
for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_arch)) {
// Validate the option here; we don't save the type here because its
// particular spelling may participate in other driver choices.
llvm::Triple::ArchType Arch =
tools::darwin::getArchTypeForMachOArchName(A->getValue());
if (Arch == llvm::Triple::UnknownArch) {
Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args);
continue;
}
A->claim();
if (ArchNames.insert(A->getValue()).second)
Archs.push_back(A->getValue());
}
}
// When there is no explicit arch for this platform, make sure we still bind
// the architecture (to the default) so that -Xarch_ is handled correctly.
if (!Archs.size())
Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
ActionList SingleActions;
BuildActions(C, Args, BAInputs, SingleActions);
// Add in arch bindings for every top level action, as well as lipo and
// dsymutil steps if needed.
for (Action* Act : SingleActions) {
// Make sure we can lipo this kind of output. If not (and it is an actual
// output) then we disallow, since we can't create an output file with the
// right name without overwriting it. We could remove this oddity by just
// changing the output names to include the arch, which would also fix
// -save-temps. Compatibility wins for now.
if (Archs.size() > 1 && !types::canLipoType(Act->getType()))
Diag(clang::diag::err_drv_invalid_output_with_multiple_archs)
<< types::getTypeName(Act->getType());
ActionList Inputs;
for (unsigned i = 0, e = Archs.size(); i != e; ++i)
Inputs.push_back(C.MakeAction<BindArchAction>(Act, Archs[i]));
// Lipo if necessary, we do it this way because we need to set the arch flag
// so that -Xarch_ gets overwritten.
if (Inputs.size() == 1 || Act->getType() == types::TY_Nothing)
Actions.append(Inputs.begin(), Inputs.end());
else
Actions.push_back(C.MakeAction<LipoJobAction>(Inputs, Act->getType()));
// Handle debug info queries.
Arg *A = Args.getLastArg(options::OPT_g_Group);
2012-04-16 05:22:10 +08:00
if (A && !A->getOption().matches(options::OPT_g0) &&
!A->getOption().matches(options::OPT_gstabs) &&
ContainsCompileOrAssembleAction(Actions.back())) {
2012-04-16 05:22:10 +08:00
// Add a 'dsymutil' step if necessary, when debug info is enabled and we
// have a compile input. We need to run 'dsymutil' ourselves in such cases
2013-01-29 01:39:03 +08:00
// because the debug info will refer to a temporary object file which
2012-04-16 05:22:10 +08:00
// will be removed at the end of the compilation process.
if (Act->getType() == types::TY_Image) {
ActionList Inputs;
Inputs.push_back(Actions.back());
Actions.pop_back();
Actions.push_back(
C.MakeAction<DsymutilJobAction>(Inputs, types::TY_dSYM));
}
2012-04-16 05:22:10 +08:00
// Verify the debug info output.
if (Args.hasArg(options::OPT_verify_debug_info)) {
Action* LastAction = Actions.back();
2012-04-16 05:22:10 +08:00
Actions.pop_back();
Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>(
LastAction, types::TY_Nothing));
2012-04-16 05:22:10 +08:00
}
}
}
}
/// \brief Check that the file referenced by Value exists. If it doesn't,
/// issue a diagnostic and return false.
static bool DiagnoseInputExistence(const Driver &D, const DerivedArgList &Args,
StringRef Value, types::ID Ty) {
if (!D.getCheckInputsExist())
return true;
// stdin always exists.
if (Value == "-")
return true;
SmallString<64> Path(Value);
if (Arg *WorkDir = Args.getLastArg(options::OPT_working_directory)) {
if (!llvm::sys::path::is_absolute(Path)) {
SmallString<64> Directory(WorkDir->getValue());
llvm::sys::path::append(Directory, Value);
Path.assign(Directory);
}
}
if (llvm::sys::fs::exists(Twine(Path)))
return true;
if (D.IsCLMode()) {
if (!llvm::sys::path::is_absolute(Twine(Path)) &&
llvm::sys::Process::FindInEnvPath("LIB", Value))
return true;
if (Args.hasArg(options::OPT__SLASH_link) && Ty == types::TY_Object) {
// Arguments to the /link flag might cause the linker to search for object
// and library files in paths we don't know about. Don't error in such
// cases.
return true;
}
}
D.Diag(clang::diag::err_drv_no_such_file) << Path;
return false;
}
// Construct a the list of inputs and their types.
void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
InputList &Inputs) const {
// Track the current user specified (-x) input. We also explicitly track the
// argument used to set the type; we only want to claim the type when we
// actually use it, so we warn about unused -x arguments.
types::ID InputType = types::TY_Nothing;
Arg *InputTypeArg = nullptr;
// The last /TC or /TP option sets the input type to C or C++ globally.
if (Arg *TCTP = Args.getLastArgNoClaim(options::OPT__SLASH_TC,
options::OPT__SLASH_TP)) {
InputTypeArg = TCTP;
InputType = TCTP->getOption().matches(options::OPT__SLASH_TC)
? types::TY_C
: types::TY_CXX;
Arg *Previous = nullptr;
bool ShowNote = false;
for (Arg *A : Args.filtered(options::OPT__SLASH_TC, options::OPT__SLASH_TP)) {
if (Previous) {
Diag(clang::diag::warn_drv_overriding_flag_option)
<< Previous->getSpelling() << A->getSpelling();
ShowNote = true;
}
Previous = A;
}
if (ShowNote)
Diag(clang::diag::note_drv_t_option_is_global);
// No driver mode exposes -x and /TC or /TP; we don't support mixing them.
assert(!Args.hasArg(options::OPT_x) && "-x and /TC or /TP is not allowed");
}
for (Arg *A : Args) {
if (A->getOption().getKind() == Option::InputClass) {
const char *Value = A->getValue();
types::ID Ty = types::TY_INVALID;
// Infer the input type if necessary.
if (InputType == types::TY_Nothing) {
// If there was an explicit arg for this, claim it.
if (InputTypeArg)
InputTypeArg->claim();
// stdin must be handled specially.
if (memcmp(Value, "-", 2) == 0) {
// If running with -E, treat as a C input (this changes the builtin
// macros, for example). This may be overridden by -ObjC below.
//
// Otherwise emit an error but still use a valid type to avoid
// spurious errors (e.g., no inputs).
if (!Args.hasArgNoClaim(options::OPT_E) && !CCCIsCPP())
Diag(IsCLMode() ? clang::diag::err_drv_unknown_stdin_type_clang_cl
: clang::diag::err_drv_unknown_stdin_type);
Ty = types::TY_C;
} else {
// Otherwise lookup by extension.
// Fallback is C if invoked as C preprocessor or Object otherwise.
// We use a host hook here because Darwin at least has its own
// idea of what .s is.
if (const char *Ext = strrchr(Value, '.'))
Ty = TC.LookupTypeForExtension(Ext + 1);
if (Ty == types::TY_INVALID) {
if (CCCIsCPP())
Ty = types::TY_C;
else
Ty = types::TY_Object;
}
// If the driver is invoked as C++ compiler (like clang++ or c++) it
// should autodetect some input files as C++ for g++ compatibility.
if (CCCIsCXX()) {
types::ID OldTy = Ty;
Ty = types::lookupCXXTypeForCType(Ty);
if (Ty != OldTy)
Diag(clang::diag::warn_drv_treating_input_as_cxx)
<< getTypeName(OldTy) << getTypeName(Ty);
}
}
2009-05-19 05:47:54 +08:00
// -ObjC and -ObjC++ override the default language, but only for "source
// files". We just treat everything that isn't a linker input as a
// source file.
//
2009-05-19 05:47:54 +08:00
// FIXME: Clean this up if we move the phase sequence into the type.
if (Ty != types::TY_Object) {
if (Args.hasArg(options::OPT_ObjC))
Ty = types::TY_ObjC;
else if (Args.hasArg(options::OPT_ObjCXX))
Ty = types::TY_ObjCXX;
}
} else {
assert(InputTypeArg && "InputType set w/o InputTypeArg");
if (!InputTypeArg->getOption().matches(options::OPT_x)) {
// If emulating cl.exe, make sure that /TC and /TP don't affect input
// object files.
const char *Ext = strrchr(Value, '.');
if (Ext && TC.LookupTypeForExtension(Ext + 1) == types::TY_Object)
Ty = types::TY_Object;
}
if (Ty == types::TY_INVALID) {
Ty = InputType;
InputTypeArg->claim();
}
}
if (DiagnoseInputExistence(*this, Args, Value, Ty))
Inputs.push_back(std::make_pair(Ty, A));
} else if (A->getOption().matches(options::OPT__SLASH_Tc)) {
StringRef Value = A->getValue();
if (DiagnoseInputExistence(*this, Args, Value, types::TY_C)) {
Arg *InputArg = MakeInputArg(Args, *Opts, A->getValue());
Inputs.push_back(std::make_pair(types::TY_C, InputArg));
}
A->claim();
} else if (A->getOption().matches(options::OPT__SLASH_Tp)) {
StringRef Value = A->getValue();
if (DiagnoseInputExistence(*this, Args, Value, types::TY_CXX)) {
Arg *InputArg = MakeInputArg(Args, *Opts, A->getValue());
Inputs.push_back(std::make_pair(types::TY_CXX, InputArg));
}
A->claim();
} else if (A->getOption().hasFlag(options::LinkerInput)) {
// Just treat as object type, we could make a special type for this if
// necessary.
Inputs.push_back(std::make_pair(types::TY_Object, A));
} else if (A->getOption().matches(options::OPT_x)) {
InputTypeArg = A;
InputType = types::lookupTypeForTypeSpecifier(A->getValue());
A->claim();
// Follow gcc behavior and treat as linker input for invalid -x
// options. Its not clear why we shouldn't just revert to unknown; but
2010-12-18 05:22:33 +08:00
// this isn't very important, we might as well be bug compatible.
if (!InputType) {
Diag(clang::diag::err_drv_unknown_language) << A->getValue();
InputType = types::TY_Object;
}
} else if (A->getOption().getID() == options::OPT__SLASH_U) {
assert(A->getNumValues() == 1 && "The /U option has one value.");
StringRef Val = A->getValue(0);
if (Val.find_first_of("/\\") != StringRef::npos) {
// Warn about e.g. "/Users/me/myfile.c".
Diag(diag::warn_slash_u_filename) << Val;
Diag(diag::note_use_dashdash);
}
}
}
if (CCCIsCPP() && Inputs.empty()) {
// If called as standalone preprocessor, stdin is processed
// if no other input is present.
Arg *A = MakeInputArg(Args, *Opts, "-");
Inputs.push_back(std::make_pair(types::TY_C, A));
}
}
namespace {
/// Provides a convenient interface for different programming models to generate
/// the required device actions.
class OffloadingActionBuilder final {
/// Flag used to trace errors in the builder.
bool IsValid = false;
/// The compilation that is using this builder.
Compilation &C;
/// Map between an input argument and the offload kinds used to process it.
std::map<const Arg *, unsigned> InputArgToOffloadKindMap;
/// Builder interface. It doesn't build anything or keep any state.
class DeviceActionBuilder {
public:
typedef llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PhasesTy;
enum ActionBuilderReturnCode {
// The builder acted successfully on the current action.
ABRT_Success,
// The builder didn't have to act on the current action.
ABRT_Inactive,
// The builder was successful and requested the host action to not be
// generated.
ABRT_Ignore_Host,
};
protected:
/// Compilation associated with this builder.
Compilation &C;
/// Tool chains associated with this builder. The same programming
/// model may have associated one or more tool chains.
SmallVector<const ToolChain *, 2> ToolChains;
/// The derived arguments associated with this builder.
DerivedArgList &Args;
/// The inputs associated with this builder.
const Driver::InputList &Inputs;
/// The associated offload kind.
Action::OffloadKind AssociatedOffloadKind = Action::OFK_None;
public:
DeviceActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs,
Action::OffloadKind AssociatedOffloadKind)
: C(C), Args(Args), Inputs(Inputs),
AssociatedOffloadKind(AssociatedOffloadKind) {}
virtual ~DeviceActionBuilder() {}
/// Fill up the array \a DA with all the device dependences that should be
/// added to the provided host action \a HostAction. By default it is
/// inactive.
virtual ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) {
return ABRT_Inactive;
}
/// Update the state to include the provided host action \a HostAction as a
/// dependency of the current device action. By default it is inactive.
virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) {
return ABRT_Inactive;
}
/// Append top level actions generated by the builder. Return true if errors
/// were found.
virtual void appendTopLevelActions(ActionList &AL) {}
/// Append linker actions generated by the builder. Return true if errors
/// were found.
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
/// Initialize the builder. Return true if any initialization errors are
/// found.
virtual bool initialize() { return false; }
/// Return true if the builder can use bundling/unbundling.
virtual bool canUseBundlerUnbundler() const { return false; }
/// Return true if this builder is valid. We have a valid builder if we have
/// associated device tool chains.
bool isValid() { return !ToolChains.empty(); }
/// Return the associated offload kind.
Action::OffloadKind getAssociatedOffloadKind() {
return AssociatedOffloadKind;
}
};
/// \brief CUDA action builder. It injects device code in the host backend
/// action.
class CudaActionBuilder final : public DeviceActionBuilder {
/// Flags to signal if the user requested host-only or device-only
/// compilation.
bool CompileHostOnly = false;
bool CompileDeviceOnly = false;
/// List of GPU architectures to use in this compilation.
SmallVector<CudaArch, 4> GpuArchList;
/// The CUDA actions for the current input.
ActionList CudaDeviceActions;
/// The CUDA fat binary if it was generated for the current input.
Action *CudaFatBinary = nullptr;
/// Flag that is set to true if this builder acted on the current input.
bool IsActive = false;
public:
CudaActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {}
ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) override {
if (!IsActive)
return ABRT_Inactive;
// If we don't have more CUDA actions, we don't have any dependences to
// create for the host.
if (CudaDeviceActions.empty())
return ABRT_Success;
assert(CudaDeviceActions.size() == GpuArchList.size() &&
"Expecting one action per GPU architecture.");
assert(!CompileHostOnly &&
"Not expecting CUDA actions in host-only compilation.");
// If we are generating code for the device or we are in a backend phase,
// we attempt to generate the fat binary. We compile each arch to ptx and
// assemble to cubin, then feed the cubin *and* the ptx into a device
// "link" action, which uses fatbinary to combine these cubins into one
// fatbin. The fatbin is then an input to the host action if not in
// device-only mode.
if (CompileDeviceOnly || CurPhase == phases::Backend) {
ActionList DeviceActions;
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
// Produce the device action from the current phase up to the assemble
// phase.
for (auto Ph : Phases) {
// Skip the phases that were already dealt with.
if (Ph < CurPhase)
continue;
// We have to be consistent with the host final phase.
if (Ph > FinalPhase)
break;
CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
C, Args, Ph, CudaDeviceActions[I]);
if (Ph == phases::Assemble)
break;
}
// If we didn't reach the assemble phase, we can't generate the fat
// binary. We don't need to generate the fat binary if we are not in
// device-only mode.
if (!isa<AssembleJobAction>(CudaDeviceActions[I]) ||
CompileDeviceOnly)
continue;
Action *AssembleAction = CudaDeviceActions[I];
assert(AssembleAction->getType() == types::TY_Object);
assert(AssembleAction->getInputs().size() == 1);
Action *BackendAction = AssembleAction->getInputs()[0];
assert(BackendAction->getType() == types::TY_PP_Asm);
for (auto &A : {AssembleAction, BackendAction}) {
OffloadAction::DeviceDependences DDep;
DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
Action::OFK_Cuda);
DeviceActions.push_back(
C.MakeAction<OffloadAction>(DDep, A->getType()));
}
}
// We generate the fat binary if we have device input actions.
if (!DeviceActions.empty()) {
CudaFatBinary =
C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
if (!CompileDeviceOnly) {
DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
Action::OFK_Cuda);
// Clear the fat binary, it is already a dependence to an host
// action.
CudaFatBinary = nullptr;
}
// Remove the CUDA actions as they are already connected to an host
// action or fat binary.
CudaDeviceActions.clear();
}
// We avoid creating host action in device-only mode.
return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
} else if (CurPhase > phases::Backend) {
// If we are past the backend phase and still have a device action, we
// don't have to do anything as this action is already a device
// top-level action.
return ABRT_Success;
}
assert(CurPhase < phases::Backend && "Generating single CUDA "
"instructions should only occur "
"before the backend phase!");
// By default, we produce an action for each device arch.
for (Action *&A : CudaDeviceActions)
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
return ABRT_Success;
}
ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
// While generating code for CUDA, we only depend on the host input action
// to trigger the creation of all the CUDA device actions.
// If we are dealing with an input action, replicate it for each GPU
// architecture. If we are in host-only mode we return 'success' so that
// the host uses the CUDA offload kind.
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
assert(!GpuArchList.empty() &&
"We should have at least one GPU architecture.");
// If the host input is not CUDA, we don't need to bother about this
// input.
if (IA->getType() != types::TY_CUDA) {
// The builder will ignore this input.
IsActive = false;
return ABRT_Inactive;
}
// Set the flag to true, so that the builder acts on the current input.
IsActive = true;
if (CompileHostOnly)
return ABRT_Success;
// Replicate inputs for each GPU architecture.
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
CudaDeviceActions.push_back(C.MakeAction<InputAction>(
IA->getInputArg(), types::TY_CUDA_DEVICE));
return ABRT_Success;
}
return IsActive ? ABRT_Success : ABRT_Inactive;
}
void appendTopLevelActions(ActionList &AL) override {
// Utility to append actions to the top level list.
auto AddTopLevel = [&](Action *A, CudaArch BoundArch) {
OffloadAction::DeviceDependences Dep;
Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch),
Action::OFK_Cuda);
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
};
// If we have a fat binary, add it to the list.
if (CudaFatBinary) {
AddTopLevel(CudaFatBinary, CudaArch::UNKNOWN);
CudaDeviceActions.clear();
CudaFatBinary = nullptr;
return;
}
if (CudaDeviceActions.empty())
return;
// If we have CUDA actions at this point, that's because we have a have
// partial compilation, so we should have an action for each GPU
// architecture.
assert(CudaDeviceActions.size() == GpuArchList.size() &&
"Expecting one action per GPU architecture.");
assert(ToolChains.size() == 1 &&
"Expecting to have a sing CUDA toolchain.");
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
AddTopLevel(CudaDeviceActions[I], GpuArchList[I]);
CudaDeviceActions.clear();
}
bool initialize() override {
// We don't need to support CUDA.
if (!C.hasOffloadToolChain<Action::OFK_Cuda>())
return false;
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
assert(HostTC && "No toolchain for host compilation.");
if (HostTC->getTriple().isNVPTX()) {
// We do not support targeting NVPTX for host compilation. Throw
// an error and abort pipeline construction early so we don't trip
// asserts that assume device-side compilation.
C.getDriver().Diag(diag::err_drv_cuda_nvptx_host);
return true;
}
ToolChains.push_back(C.getSingleOffloadToolChain<Action::OFK_Cuda>());
Arg *PartialCompilationArg = Args.getLastArg(
options::OPT_cuda_host_only, options::OPT_cuda_device_only,
options::OPT_cuda_compile_host_device);
CompileHostOnly = PartialCompilationArg &&
PartialCompilationArg->getOption().matches(
options::OPT_cuda_host_only);
CompileDeviceOnly = PartialCompilationArg &&
PartialCompilationArg->getOption().matches(
options::OPT_cuda_device_only);
// Collect all cuda_gpu_arch parameters, removing duplicates.
std::set<CudaArch> GpuArchs;
bool Error = false;
for (Arg *A : Args) {
if (!(A->getOption().matches(options::OPT_cuda_gpu_arch_EQ) ||
A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ)))
continue;
A->claim();
const StringRef ArchStr = A->getValue();
if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ) &&
ArchStr == "all") {
GpuArchs.clear();
continue;
}
CudaArch Arch = StringToCudaArch(ArchStr);
if (Arch == CudaArch::UNKNOWN) {
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
Error = true;
} else if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
GpuArchs.insert(Arch);
else if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ))
GpuArchs.erase(Arch);
else
llvm_unreachable("Unexpected option.");
}
// Collect list of GPUs remaining in the set.
for (CudaArch Arch : GpuArchs)
GpuArchList.push_back(Arch);
// Default to sm_20 which is the lowest common denominator for
// supported GPUs. sm_20 code should work correctly, if
// suboptimally, on all newer GPUs.
if (GpuArchList.empty())
GpuArchList.push_back(CudaArch::SM_20);
return Error;
}
};
/// OpenMP action builder. The host bitcode is passed to the device frontend
/// and all the device linked images are passed to the host link phase.
class OpenMPActionBuilder final : public DeviceActionBuilder {
/// The OpenMP actions for the current input.
ActionList OpenMPDeviceActions;
/// The linker inputs obtained for each toolchain.
SmallVector<ActionList, 8> DeviceLinkerInputs;
public:
OpenMPActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {}
ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) override {
// We should always have an action for each input.
assert(OpenMPDeviceActions.size() == ToolChains.size() &&
"Number of OpenMP actions and toolchains do not match.");
// The host only depends on device action in the linking phase, when all
// the device images have to be embedded in the host image.
if (CurPhase == phases::Link) {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");
auto LI = DeviceLinkerInputs.begin();
for (auto *A : OpenMPDeviceActions) {
LI->push_back(A);
++LI;
}
// We passed the device action as a host dependence, so we don't need to
// do anything else with them.
OpenMPDeviceActions.clear();
return ABRT_Success;
}
// By default, we produce an action for each device arch.
for (Action *&A : OpenMPDeviceActions)
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
return ABRT_Success;
}
ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
// If this is an input action replicate it for each OpenMP toolchain.
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
OpenMPDeviceActions.clear();
for (unsigned I = 0; I < ToolChains.size(); ++I)
OpenMPDeviceActions.push_back(
C.MakeAction<InputAction>(IA->getInputArg(), IA->getType()));
return ABRT_Success;
}
// If this is an unbundling action use it as is for each OpenMP toolchain.
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
OpenMPDeviceActions.clear();
for (unsigned I = 0; I < ToolChains.size(); ++I) {
OpenMPDeviceActions.push_back(UA);
UA->registerDependentActionInfo(
ToolChains[I], /*BoundArch=*/StringRef(), Action::OFK_OpenMP);
}
return ABRT_Success;
}
// When generating code for OpenMP we use the host compile phase result as
// a dependence to the device compile phase so that it can learn what
// declarations should be emitted. However, this is not the only use for
// the host action, so we prevent it from being collapsed.
if (isa<CompileJobAction>(HostAction)) {
HostAction->setCannotBeCollapsedWithNextDependentAction();
assert(ToolChains.size() == OpenMPDeviceActions.size() &&
"Toolchains and device action sizes do not match.");
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, Action::OFK_OpenMP);
auto TC = ToolChains.begin();
for (Action *&A : OpenMPDeviceActions) {
assert(isa<CompileJobAction>(A));
OffloadAction::DeviceDependences DDep;
DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
A = C.MakeAction<OffloadAction>(HDep, DDep);
++TC;
}
}
return ABRT_Success;
}
void appendTopLevelActions(ActionList &AL) override {
if (OpenMPDeviceActions.empty())
return;
// We should always have an action for each input.
assert(OpenMPDeviceActions.size() == ToolChains.size() &&
"Number of OpenMP actions and toolchains do not match.");
// Append all device actions followed by the proper offload action.
auto TI = ToolChains.begin();
for (auto *A : OpenMPDeviceActions) {
OffloadAction::DeviceDependences Dep;
Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
++TI;
}
// We no longer need the action stored in this builder.
OpenMPDeviceActions.clear();
}
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");
// Append a new link action for each device.
auto TC = ToolChains.begin();
for (auto &LI : DeviceLinkerInputs) {
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
Action::OFK_OpenMP);
++TC;
}
}
bool initialize() override {
// Get the OpenMP toolchains. If we don't get any, the action builder will
// know there is nothing to do related to OpenMP offloading.
auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE;
++TI)
ToolChains.push_back(TI->second);
DeviceLinkerInputs.resize(ToolChains.size());
return false;
}
bool canUseBundlerUnbundler() const override {
// OpenMP should use bundled files whenever possible.
return true;
}
};
///
/// TODO: Add the implementation for other specialized builders here.
///
/// Specialized builders being used by this offloading action builder.
SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders;
/// Flag set to true if all valid builders allow file bundling/unbundling.
bool CanUseBundler;
public:
OffloadingActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: C(C) {
// Create a specialized builder for each device toolchain.
IsValid = true;
// Create a specialized builder for CUDA.
SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));
// Create a specialized builder for OpenMP.
SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));
//
// TODO: Build other specialized builders here.
//
// Initialize all the builders, keeping track of errors. If all valid
// builders agree that we can use bundling, set the flag to true.
unsigned ValidBuilders = 0u;
unsigned ValidBuildersSupportingBundling = 0u;
for (auto *SB : SpecializedBuilders) {
IsValid = IsValid && !SB->initialize();
// Update the counters if the builder is valid.
if (SB->isValid()) {
++ValidBuilders;
if (SB->canUseBundlerUnbundler())
++ValidBuildersSupportingBundling;
}
}
CanUseBundler =
ValidBuilders && ValidBuilders == ValidBuildersSupportingBundling;
}
~OffloadingActionBuilder() {
for (auto *SB : SpecializedBuilders)
delete SB;
}
/// Generate an action that adds device dependences (if any) to a host action.
/// If no device dependence actions exist, just return the host action \a
/// HostAction. If an error is found or if no builder requires the host action
/// to be generated, return nullptr.
Action *
addDeviceDependencesToHostAction(Action *HostAction, const Arg *InputArg,
phases::ID CurPhase, phases::ID FinalPhase,
DeviceActionBuilder::PhasesTy &Phases) {
if (!IsValid)
return nullptr;
if (SpecializedBuilders.empty())
return HostAction;
assert(HostAction && "Invalid host action!");
OffloadAction::DeviceDependences DDeps;
// Check if all the programming models agree we should not emit the host
// action. Also, keep track of the offloading kinds employed.
auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
unsigned InactiveBuilders = 0u;
unsigned IgnoringBuilders = 0u;
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid()) {
++InactiveBuilders;
continue;
}
auto RetCode =
SB->getDeviceDependences(DDeps, CurPhase, FinalPhase, Phases);
// If the builder explicitly says the host action should be ignored,
// we need to increment the variable that tracks the builders that request
// the host object to be ignored.
if (RetCode == DeviceActionBuilder::ABRT_Ignore_Host)
++IgnoringBuilders;
// Unless the builder was inactive for this action, we have to record the
// offload kind because the host will have to use it.
if (RetCode != DeviceActionBuilder::ABRT_Inactive)
OffloadKind |= SB->getAssociatedOffloadKind();
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
}
// If all builders agree that the host object should be ignored, just return
// nullptr.
if (IgnoringBuilders &&
SpecializedBuilders.size() == (InactiveBuilders + IgnoringBuilders))
return nullptr;
if (DDeps.getActions().empty())
return HostAction;
// We have dependences we need to bundle together. We use an offload action
// for that.
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, DDeps);
return C.MakeAction<OffloadAction>(HDep, DDeps);
}
/// Generate an action that adds a host dependence to a device action. The
/// results will be kept in this action builder. Return true if an error was
/// found.
bool addHostDependenceToDeviceActions(Action *&HostAction,
const Arg *InputArg) {
if (!IsValid)
return true;
// If we are supporting bundling/unbundling and the current action is an
// input action of non-source file, we replace the host action by the
// unbundling action. The bundler tool has the logic to detect if an input
// is a bundle or not and if the input is not a bundle it assumes it is a
// host file. Therefore it is safe to create an unbundling action even if
// the input is not a bundle.
if (CanUseBundler && isa<InputAction>(HostAction) &&
InputArg->getOption().getKind() == llvm::opt::Option::InputClass &&
!types::isSrcFile(HostAction->getType())) {
auto UnbundlingHostAction =
C.MakeAction<OffloadUnbundlingJobAction>(HostAction);
UnbundlingHostAction->registerDependentActionInfo(
C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/StringRef(), Action::OFK_Host);
HostAction = UnbundlingHostAction;
}
assert(HostAction && "Invalid host action!");
// Register the offload kinds that are used.
auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
auto RetCode = SB->addDeviceDepences(HostAction);
// Host dependences for device actions are not compatible with that same
// action being ignored.
assert(RetCode != DeviceActionBuilder::ABRT_Ignore_Host &&
"Host dependence not expected to be ignored.!");
// Unless the builder was inactive for this action, we have to record the
// offload kind because the host will have to use it.
if (RetCode != DeviceActionBuilder::ABRT_Inactive)
OffloadKind |= SB->getAssociatedOffloadKind();
}
return false;
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
/// Add the offloading top level actions to the provided action list. This
/// function can replace the host action by a bundling action if the
/// programming models allow it.
bool appendTopLevelActions(ActionList &AL, Action *HostAction,
const Arg *InputArg) {
// Get the device actions to be appended.
ActionList OffloadAL;
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
SB->appendTopLevelActions(OffloadAL);
}
// If we can use the bundler, replace the host action by the bundling one in
// the resulting list. Otherwise, just append the device actions.
if (CanUseBundler && !OffloadAL.empty()) {
// Add the host action to the list in order to create the bundling action.
OffloadAL.push_back(HostAction);
// We expect that the host action was just appended to the action list
// before this method was called.
assert(HostAction == AL.back() && "Host action not in the list??");
HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL);
AL.back() = HostAction;
} else
AL.append(OffloadAL.begin(), OffloadAL.end());
// Propagate to the current host action (if any) the offload information
// associated with the current input.
if (HostAction)
HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg],
/*BoundArch=*/nullptr);
return false;
}
/// Processes the host linker action. This currently consists of replacing it
/// with an offload action if there are device link objects and propagate to
/// the host action all the offload kinds used in the current compilation. The
/// resulting action is returned.
Action *processHostLinkAction(Action *HostAction) {
// Add all the dependences from the device linking actions.
OffloadAction::DeviceDependences DDeps;
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
SB->appendLinkDependences(DDeps);
}
// Calculate all the offload kinds used in the current compilation.
unsigned ActiveOffloadKinds = 0u;
for (auto &I : InputArgToOffloadKindMap)
ActiveOffloadKinds |= I.second;
// If we don't have device dependencies, we don't have to create an offload
// action.
if (DDeps.getActions().empty()) {
// Propagate all the active kinds to host action. Given that it is a link
// action it is assumed to depend on all actions generated so far.
HostAction->propagateHostOffloadInfo(ActiveOffloadKinds,
/*BoundArch=*/nullptr);
return HostAction;
}
// Create the offload action with all dependences. When an offload action
// is created the kinds are propagated to the host action, so we don't have
// to do that explicitly here.
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch*/ nullptr, ActiveOffloadKinds);
return C.MakeAction<OffloadAction>(HDep, DDeps);
}
};
} // anonymous namespace.
void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
const InputList &Inputs, ActionList &Actions) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
if (!SuppressMissingInputWarning && Inputs.empty()) {
Diag(clang::diag::err_drv_no_input_files);
return;
}
Arg *FinalPhaseArg;
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
if (FinalPhase == phases::Link) {
if (Args.hasArg(options::OPT_emit_llvm))
Diag(clang::diag::err_drv_emit_llvm_link);
if (IsCLMode() && LTOMode != LTOK_None &&
!Args.getLastArgValue(options::OPT_fuse_ld_EQ).equals_lower("lld"))
Diag(clang::diag::err_drv_lto_without_lld);
}
// Reject -Z* at the top level, these options should never have been exposed
// by gcc.
if (Arg *A = Args.getLastArg(options::OPT_Z_Joined))
Diag(clang::diag::err_drv_use_of_Z_option) << A->getAsString(Args);
// Diagnose misuse of /Fo.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fo)) {
StringRef V = A->getValue();
if (Inputs.size() > 1 && !V.empty() &&
!llvm::sys::path::is_separator(V.back())) {
// Check whether /Fo tries to name an output file for multiple inputs.
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
<< A->getSpelling() << V;
Args.eraseArg(options::OPT__SLASH_Fo);
}
}
// Diagnose misuse of /Fa.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fa)) {
StringRef V = A->getValue();
if (Inputs.size() > 1 && !V.empty() &&
!llvm::sys::path::is_separator(V.back())) {
// Check whether /Fa tries to name an asm file for multiple inputs.
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
<< A->getSpelling() << V;
Args.eraseArg(options::OPT__SLASH_Fa);
}
}
// Diagnose misuse of /o.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_o)) {
if (A->getValue()[0] == '\0') {
// It has to have a value.
Diag(clang::diag::err_drv_missing_argument) << A->getSpelling() << 1;
Args.eraseArg(options::OPT__SLASH_o);
}
}
clang-cl: Implement initial limited support for precompiled headers. In the gcc precompiled header model, one explicitly runs clang with `-x c++-header` on a .h file to produce a gch file, and then includes the header with `-include foo.h` and if a .gch file exists for that header it gets used. This is documented at http://clang.llvm.org/docs/UsersManual.html#precompiled-headers cl.exe's model is fairly different, and controlled by the two flags /Yc and /Yu. A pch file is generated as a side effect of a regular compilation when /Ycheader.h is passed. While the compilation is running, the compiler keeps track of #include lines in the main translation unit and writes everything up to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells the compiler to skip all code in the main TU up to and including `#include "header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu without an argument, in that case a `#pragma hrdstop` takes the role of controlling the point where pch ends and real code begins.) This patch implements limited support for this in that it requires the pch header to be passed as a /FI force include flag – with this restriction, it can be implemented almost completely in the driver with fairly small amounts of code. For /Yu, this is trivial, and for /Yc a separate pch action is added that runs before the actual compilation. After r261774, the first failing command makes a compilation stop – this means if the pch fails to build the main compilation won't run, which is what we want. However, in /fallback builds we need to run the main compilation even if the pch build fails so that the main compilation's fallback can run. To achieve this, add a ForceSuccessCommand that pretends that the pch build always succeeded in /fallback builds (the main compilation will then fail to open the pch and run the fallback cl.exe invocation). If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl will now emit a "not implemented yet; flag ignored" warning that can be disabled using -Wno-clang-cl-pch. Since clang-cl doesn't yet serialize some important things (most notably `pragma comment(lib, ...)`, this feature is disabled by default and only enabled by an internal driver flag. Once it's more stable, this internal flag will disappear. (The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it does. Implementing support for this should be possible with the approach in this patch with minimal frontend changes by passing a --stop-at / --start-at flag from the driver to the frontend. This is left for a follow-up. I don't think we ever want to support `#pragma hdrstop`, and supporting it with this approach isn't easy: This approach relies on the driver knowing the pch filename in advance, and `#pragma hdrstop(out.pch)` can set the output filename, so the driver can't know about it in advance.) clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe would put them, but the pch file format is of course incompatible. This has ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in /fallback builds. http://reviews.llvm.org/D17695 llvm-svn: 262420
2016-03-02 07:16:44 +08:00
// Diagnose unsupported forms of /Yc /Yu. Ignore /Yc/Yu for now if:
// * no filename after it
// * both /Yc and /Yu passed but with different filenames
// * corresponding file not also passed as /FI
Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
if (YcArg && YcArg->getValue()[0] == '\0') {
Diag(clang::diag::warn_drv_ycyu_no_arg_clang_cl) << YcArg->getSpelling();
Args.eraseArg(options::OPT__SLASH_Yc);
YcArg = nullptr;
}
if (YuArg && YuArg->getValue()[0] == '\0') {
Diag(clang::diag::warn_drv_ycyu_no_arg_clang_cl) << YuArg->getSpelling();
Args.eraseArg(options::OPT__SLASH_Yu);
YuArg = nullptr;
}
if (YcArg && YuArg && strcmp(YcArg->getValue(), YuArg->getValue()) != 0) {
Diag(clang::diag::warn_drv_ycyu_different_arg_clang_cl);
Args.eraseArg(options::OPT__SLASH_Yc);
Args.eraseArg(options::OPT__SLASH_Yu);
YcArg = YuArg = nullptr;
}
if (YcArg || YuArg) {
StringRef Val = YcArg ? YcArg->getValue() : YuArg->getValue();
bool FoundMatchingInclude = false;
for (const Arg *Inc : Args.filtered(options::OPT_include)) {
// FIXME: Do case-insensitive matching and consider / and \ as equal.
if (Inc->getValue() == Val)
FoundMatchingInclude = true;
}
if (!FoundMatchingInclude) {
Diag(clang::diag::warn_drv_ycyu_no_fi_arg_clang_cl)
<< (YcArg ? YcArg : YuArg)->getSpelling();
Args.eraseArg(options::OPT__SLASH_Yc);
Args.eraseArg(options::OPT__SLASH_Yu);
YcArg = YuArg = nullptr;
}
}
if (YcArg && Inputs.size() > 1) {
Diag(clang::diag::warn_drv_yc_multiple_inputs_clang_cl);
Args.eraseArg(options::OPT__SLASH_Yc);
YcArg = nullptr;
}
if (Args.hasArg(options::OPT__SLASH_Y_)) {
// /Y- disables all pch handling. Rather than check for it everywhere,
// just remove clang-cl pch-related flags here.
Args.eraseArg(options::OPT__SLASH_Fp);
Args.eraseArg(options::OPT__SLASH_Yc);
Args.eraseArg(options::OPT__SLASH_Yu);
YcArg = YuArg = nullptr;
}
// Builder to be used to build offloading actions.
OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Construct the actions to perform.
ActionList LinkerInputs;
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PL;
for (auto &I : Inputs) {
types::ID InputType = I.first;
const Arg *InputArg = I.second;
PL.clear();
types::getCompilationPhases(InputType, PL);
// If the first step comes after the final phase we are doing as part of
// this compilation, warn the user about it.
phases::ID InitialPhase = PL[0];
if (InitialPhase > FinalPhase) {
// Claim here to avoid the more general unused warning.
InputArg->claim();
// Suppress all unused style warnings with -Qunused-arguments
if (Args.hasArg(options::OPT_Qunused_arguments))
continue;
// Special case when final phase determined by binary name, rather than
// by a command-line argument with a corresponding Arg.
if (CCCIsCPP())
Diag(clang::diag::warn_drv_input_file_unused_by_cpp)
<< InputArg->getAsString(Args) << getPhaseName(InitialPhase);
// Special case '-E' warning on a previously preprocessed file to make
// more sense.
else if (InitialPhase == phases::Compile &&
FinalPhase == phases::Preprocess &&
getPreprocessedType(InputType) == types::TY_INVALID)
Diag(clang::diag::warn_drv_preprocessed_input_file_unused)
<< InputArg->getAsString(Args) << !!FinalPhaseArg
<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
else
Diag(clang::diag::warn_drv_input_file_unused)
<< InputArg->getAsString(Args) << getPhaseName(InitialPhase)
<< !!FinalPhaseArg
<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
continue;
}
if (YcArg) {
// Add a separate precompile phase for the compile phase.
if (FinalPhase >= phases::Compile) {
const types::ID HeaderType = lookupHeaderTypeForSourceType(InputType);
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PCHPL;
types::getCompilationPhases(HeaderType, PCHPL);
Arg *PchInputArg = MakeInputArg(Args, *Opts, YcArg->getValue());
// Build the pipeline for the pch file.
Action *ClangClPch =
C.MakeAction<InputAction>(*PchInputArg, HeaderType);
for (phases::ID Phase : PCHPL)
ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch);
assert(ClangClPch);
Actions.push_back(ClangClPch);
// The driver currently exits after the first failed command. This
// relies on that behavior, to make sure if the pch generation fails,
// the main compilation won't run.
}
}
// Build the pipeline for this file.
Action *Current = C.MakeAction<InputAction>(*InputArg, InputType);
// Use the current host action in any of the offloading actions, if
// required.
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
break;
for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
i != e; ++i) {
phases::ID Phase = *i;
// We are done if this step is past what the user requested.
if (Phase > FinalPhase)
break;
// Add any offload action the host action depends on.
Current = OffloadBuilder.addDeviceDependencesToHostAction(
Current, InputArg, Phase, FinalPhase, PL);
if (!Current)
break;
// Queue linker inputs.
if (Phase == phases::Link) {
assert((i + 1) == e && "linking must be final compilation step.");
LinkerInputs.push_back(Current);
Current = nullptr;
break;
}
// Otherwise construct the appropriate action.
auto *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current);
// We didn't create a new action, so we will just move to the next phase.
if (NewCurrent == Current)
continue;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
Current = NewCurrent;
// Use the current host action in any of the offloading actions, if
// required.
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
break;
if (Current->getType() == types::TY_Nothing)
break;
}
// If we ended with something, add to the output list.
if (Current)
Actions.push_back(Current);
// Add any top level actions generated for offloading.
OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);
}
// Add a link action if necessary.
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
if (!LinkerInputs.empty()) {
Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
LA = OffloadBuilder.processHostLinkAction(LA);
Actions.push_back(LA);
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
}
// If we are linking, claim any options which are obviously only used for
// compilation.
if (FinalPhase == phases::Link && PL.size() == 1) {
Args.ClaimAllArgs(options::OPT_CompileOnly_Group);
Args.ClaimAllArgs(options::OPT_cl_compile_Group);
}
// Claim ignored clang-cl options.
Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
// Claim --cuda-host-only and --cuda-compile-host-device, which may be passed
// to non-CUDA compilations and should not trigger warnings there.
Args.ClaimAllArgs(options::OPT_cuda_host_only);
Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
}
Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
phases::ID Phase, Action *Input) const {
llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
// Some types skip the assembler phase (e.g., llvm-bc), but we can't
// encode this in the steps because the intermediate type depends on
// arguments. Just special case here.
if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm)
return Input;
// Build the appropriate action.
switch (Phase) {
case phases::Link:
llvm_unreachable("link action invalid here.");
case phases::Preprocess: {
types::ID OutputTy;
// -{M, MM} alter the output type.
if (Args.hasArg(options::OPT_M, options::OPT_MM)) {
OutputTy = types::TY_Dependencies;
} else {
OutputTy = Input->getType();
if (!Args.hasFlag(options::OPT_frewrite_includes,
options::OPT_fno_rewrite_includes, false) &&
!CCGenDiagnostics)
OutputTy = types::getPreprocessedType(OutputTy);
assert(OutputTy != types::TY_INVALID &&
"Cannot preprocess this input type!");
}
return C.MakeAction<PreprocessJobAction>(Input, OutputTy);
}
case phases::Precompile: {
Unrevert r280035 now that the clang-cl bug it exposed has been fixed by r280133. Original commit message: C++ Modules TS: driver support for building modules. This works as follows: we add --precompile to the existing gamut of options for specifying how far to go when compiling an input (-E, -c, -S, etc.). This flag specifies that an input is taken to the precompilation step and no further, and this can be specified when building a .pcm from a module interface or when building a .pch from a header file. The .cppm extension (and some related extensions) are implicitly recognized as C++ module interface files. If --precompile is /not/ specified, the file is compiled (via a .pcm) to a .o file containing the code for the module (and then potentially also assembled and linked, if -S, -c, etc. are not specified). We do not yet suppress the emission of object code for other users of the module interface, so for now this will only work if everything in the .cppm file has vague linkage. As with the existing support for module-map modules, prebuilt modules can be provided as compiler inputs either via the -fmodule-file= command-line argument or via files named ModuleName.pcm in one of the directories specified via -fprebuilt-module-path=. This also exposes the -fmodules-ts cc1 flag in the driver. This is still experimental, and in particular, the concrete syntax is subject to change as the Modules TS evolves in the C++ committee. Unlike -fmodules, this flag does not enable support for implicitly loading module maps nor building modules via the module cache, but those features can be turned on separately and used in conjunction with the Modules TS support. llvm-svn: 280134
2016-08-31 03:06:26 +08:00
types::ID OutputTy = getPrecompiledType(Input->getType());
assert(OutputTy != types::TY_INVALID &&
"Cannot precompile this input type!");
if (Args.hasArg(options::OPT_fsyntax_only)) {
// Syntax checks should not emit a PCH file
OutputTy = types::TY_Nothing;
}
return C.MakeAction<PrecompileJobAction>(Input, OutputTy);
}
case phases::Compile: {
if (Args.hasArg(options::OPT_fsyntax_only))
return C.MakeAction<CompileJobAction>(Input, types::TY_Nothing);
if (Args.hasArg(options::OPT_rewrite_objc))
return C.MakeAction<CompileJobAction>(Input, types::TY_RewrittenObjC);
if (Args.hasArg(options::OPT_rewrite_legacy_objc))
return C.MakeAction<CompileJobAction>(Input,
types::TY_RewrittenLegacyObjC);
if (Args.hasArg(options::OPT__analyze, options::OPT__analyze_auto))
return C.MakeAction<AnalyzeJobAction>(Input, types::TY_Plist);
if (Args.hasArg(options::OPT__migrate))
return C.MakeAction<MigrateJobAction>(Input, types::TY_Remap);
if (Args.hasArg(options::OPT_emit_ast))
return C.MakeAction<CompileJobAction>(Input, types::TY_AST);
if (Args.hasArg(options::OPT_module_file_info))
return C.MakeAction<CompileJobAction>(Input, types::TY_ModuleFile);
if (Args.hasArg(options::OPT_verify_pch))
return C.MakeAction<VerifyPCHJobAction>(Input, types::TY_Nothing);
return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
}
case phases::Backend: {
if (isUsingLTO()) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
}
if (Args.hasArg(options::OPT_emit_llvm)) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
}
return C.MakeAction<BackendJobAction>(Input, types::TY_PP_Asm);
}
case phases::Assemble:
return C.MakeAction<AssembleJobAction>(std::move(Input), types::TY_Object);
}
llvm_unreachable("invalid phase in ConstructPhaseAction");
}
void Driver::BuildJobs(Compilation &C) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
// It is an error to provide a -o option if we are making multiple output
// files.
if (FinalOutput) {
unsigned NumOutputs = 0;
for (const Action *A : C.getActions())
if (A->getType() != types::TY_Nothing)
++NumOutputs;
if (NumOutputs > 1) {
Diag(clang::diag::err_drv_output_argument_with_multiple_files);
FinalOutput = nullptr;
}
}
// Collect the list of architectures.
llvm::StringSet<> ArchNames;
if (C.getDefaultToolChain().getTriple().isOSBinFormatMachO())
for (const Arg *A : C.getArgs())
if (A->getOption().matches(options::OPT_arch))
ArchNames.insert(A->getValue());
// Set of (Action, canonical ToolChain triple) pairs we've built jobs for.
std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults;
for (Action *A : C.getActions()) {
// If we are linking an image for multiple archs then the linker wants
// -arch_multiple and -final_output <final image name>. Unfortunately, this
// doesn't fit in cleanly because we have to pass this information down.
//
// FIXME: This is a hack; find a cleaner way to integrate this into the
// process.
const char *LinkingOutput = nullptr;
if (isa<LipoJobAction>(A)) {
if (FinalOutput)
LinkingOutput = FinalOutput->getValue();
else
LinkingOutput = getDefaultImageName();
}
BuildJobsForAction(C, A, &C.getDefaultToolChain(),
/*BoundArch*/ StringRef(),
/*AtTopLevel*/ true,
/*MultipleArchs*/ ArchNames.size() > 1,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
/*LinkingOutput*/ LinkingOutput, CachedResults,
/*TargetDeviceOffloadKind*/ Action::OFK_None);
}
// If the user passed -Qunused-arguments or there were errors, don't warn
// about any unused arguments.
if (Diags.hasErrorOccurred() ||
C.getArgs().hasArg(options::OPT_Qunused_arguments))
return;
// Claim -### here.
(void)C.getArgs().hasArg(options::OPT__HASH_HASH_HASH);
// Claim --driver-mode, --rsp-quoting, it was handled earlier.
(void)C.getArgs().hasArg(options::OPT_driver_mode);
(void)C.getArgs().hasArg(options::OPT_rsp_quoting);
for (Arg *A : C.getArgs()) {
// FIXME: It would be nice to be able to send the argument to the
// DiagnosticsEngine, so that extra values, position, and so on could be
// printed.
if (!A->isClaimed()) {
if (A->getOption().hasFlag(options::NoArgumentUnused))
continue;
// Suppress the warning automatically if this is just a flag, and it is an
// instance of an argument we already claimed.
const Option &Opt = A->getOption();
if (Opt.getKind() == Option::FlagClass) {
bool DuplicateClaimed = false;
for (const Arg *AA : C.getArgs().filtered(&Opt)) {
if (AA->isClaimed()) {
DuplicateClaimed = true;
break;
}
}
if (DuplicateClaimed)
continue;
}
// In clang-cl, don't mention unknown arguments here since they have
// already been warned about.
if (!IsCLMode() || !A->getOption().matches(options::OPT_UNKNOWN))
Diag(clang::diag::warn_drv_unused_argument)
<< A->getAsString(C.getArgs());
}
}
}
namespace {
/// Utility class to control the collapse of dependent actions and select the
/// tools accordingly.
class ToolSelector final {
/// The tool chain this selector refers to.
const ToolChain &TC;
/// The compilation this selector refers to.
const Compilation &C;
/// The base action this selector refers to.
const JobAction *BaseAction;
/// Set to true if the current toolchain refers to host actions.
bool IsHostSelector;
/// Set to true if save-temps and embed-bitcode functionalities are active.
bool SaveTemps;
bool EmbedBitcode;
/// Get previous dependent action or null if that does not exist. If
/// \a CanBeCollapsed is false, that action must be legal to collapse or
/// null will be returned.
const JobAction *getPrevDependentAction(const ActionList &Inputs,
ActionList &SavedOffloadAction,
bool CanBeCollapsed = true) {
// An option can be collapsed only if it has a single input.
if (Inputs.size() != 1)
return nullptr;
Action *CurAction = *Inputs.begin();
if (CanBeCollapsed &&
!CurAction->isCollapsingWithNextDependentActionLegal())
return nullptr;
// If the input action is an offload action. Look through it and save any
// offload action that can be dropped in the event of a collapse.
if (auto *OA = dyn_cast<OffloadAction>(CurAction)) {
// If the dependent action is a device action, we will attempt to collapse
// only with other device actions. Otherwise, we would do the same but
// with host actions only.
if (!IsHostSelector) {
if (OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)) {
CurAction =
OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true);
if (CanBeCollapsed &&
!CurAction->isCollapsingWithNextDependentActionLegal())
return nullptr;
SavedOffloadAction.push_back(OA);
return dyn_cast<JobAction>(CurAction);
}
} else if (OA->hasHostDependence()) {
CurAction = OA->getHostDependence();
if (CanBeCollapsed &&
!CurAction->isCollapsingWithNextDependentActionLegal())
return nullptr;
SavedOffloadAction.push_back(OA);
return dyn_cast<JobAction>(CurAction);
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
}
return nullptr;
}
return dyn_cast<JobAction>(CurAction);
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
}
/// Return true if an assemble action can be collapsed.
bool canCollapseAssembleAction() const {
return TC.useIntegratedAs() && !SaveTemps &&
!C.getArgs().hasArg(options::OPT_via_file_asm) &&
!C.getArgs().hasArg(options::OPT__SLASH_FA) &&
!C.getArgs().hasArg(options::OPT__SLASH_Fa);
}
/// Return true if a preprocessor action can be collapsed.
bool canCollapsePreprocessorAction() const {
return !C.getArgs().hasArg(options::OPT_no_integrated_cpp) &&
!C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps &&
!C.getArgs().hasArg(options::OPT_rewrite_objc);
}
/// Struct that relates an action with the offload actions that would be
/// collapsed with it.
struct JobActionInfo final {
/// The action this info refers to.
const JobAction *JA = nullptr;
/// The offload actions we need to take care off if this action is
/// collapsed.
ActionList SavedOffloadAction;
};
/// Append collapsed offload actions from the give nnumber of elements in the
/// action info array.
static void AppendCollapsedOffloadAction(ActionList &CollapsedOffloadAction,
ArrayRef<JobActionInfo> &ActionInfo,
unsigned ElementNum) {
assert(ElementNum <= ActionInfo.size() && "Invalid number of elements.");
for (unsigned I = 0; I < ElementNum; ++I)
CollapsedOffloadAction.append(ActionInfo[I].SavedOffloadAction.begin(),
ActionInfo[I].SavedOffloadAction.end());
}
/// Functions that attempt to perform the combining. They detect if that is
/// legal, and if so they update the inputs \a Inputs and the offload action
/// that were collapsed in \a CollapsedOffloadAction. A tool that deals with
/// the combined action is returned. If the combining is not legal or if the
/// tool does not exist, null is returned.
/// Currently three kinds of collapsing are supported:
/// - Assemble + Backend + Compile;
/// - Assemble + Backend ;
/// - Backend + Compile.
const Tool *
combineAssembleBackendCompile(ArrayRef<JobActionInfo> ActionInfo,
const ActionList *&Inputs,
ActionList &CollapsedOffloadAction) {
if (ActionInfo.size() < 3 || !canCollapseAssembleAction())
return nullptr;
auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA);
auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA);
auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[2].JA);
if (!AJ || !BJ || !CJ)
return nullptr;
// Get compiler tool.
const Tool *T = TC.SelectTool(*CJ);
if (!T)
return nullptr;
// When using -fembed-bitcode, it is required to have the same tool (clang)
// for both CompilerJA and BackendJA. Otherwise, combine two stages.
if (EmbedBitcode) {
const Tool *BT = TC.SelectTool(*BJ);
if (BT == T)
return nullptr;
}
if (!T->hasIntegratedAssembler())
return nullptr;
Inputs = &CJ->getInputs();
AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
/*NumElements=*/3);
return T;
}
const Tool *combineAssembleBackend(ArrayRef<JobActionInfo> ActionInfo,
const ActionList *&Inputs,
ActionList &CollapsedOffloadAction) {
if (ActionInfo.size() < 2 || !canCollapseAssembleAction())
return nullptr;
auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA);
auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA);
if (!AJ || !BJ)
return nullptr;
// Retrieve the compile job, backend action must always be preceded by one.
ActionList CompileJobOffloadActions;
auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions,
/*CanBeCollapsed=*/false);
if (!AJ || !BJ || !CJ)
return nullptr;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
assert(isa<CompileJobAction>(CJ) &&
"Expecting compile job preceding backend job.");
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Get compiler tool.
const Tool *T = TC.SelectTool(*CJ);
if (!T)
return nullptr;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
if (!T->hasIntegratedAssembler())
return nullptr;
Inputs = &BJ->getInputs();
AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
/*NumElements=*/2);
return T;
}
const Tool *combineBackendCompile(ArrayRef<JobActionInfo> ActionInfo,
const ActionList *&Inputs,
ActionList &CollapsedOffloadAction) {
if (ActionInfo.size() < 2 || !canCollapsePreprocessorAction())
return nullptr;
auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[0].JA);
auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[1].JA);
if (!BJ || !CJ)
return nullptr;
// Get compiler tool.
const Tool *T = TC.SelectTool(*CJ);
if (!T)
return nullptr;
if (T->canEmitIR() && (SaveTemps || EmbedBitcode))
return nullptr;
Inputs = &CJ->getInputs();
AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
/*NumElements=*/2);
return T;
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
/// Updates the inputs if the obtained tool supports combining with
/// preprocessor action, and the current input is indeed a preprocessor
/// action. If combining results in the collapse of offloading actions, those
/// are appended to \a CollapsedOffloadAction.
void combineWithPreprocessor(const Tool *T, const ActionList *&Inputs,
ActionList &CollapsedOffloadAction) {
if (!T || !canCollapsePreprocessorAction() || !T->hasIntegratedCPP())
return;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Attempt to get a preprocessor action dependence.
ActionList PreprocessJobOffloadActions;
auto *PJ = getPrevDependentAction(*Inputs, PreprocessJobOffloadActions);
if (!PJ || !isa<PreprocessJobAction>(PJ))
return;
// This is legal to combine. Append any offload action we found and set the
// current inputs to preprocessor inputs.
CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(),
PreprocessJobOffloadActions.end());
Inputs = &PJ->getInputs();
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
}
public:
ToolSelector(const JobAction *BaseAction, const ToolChain &TC,
const Compilation &C, bool SaveTemps, bool EmbedBitcode)
: TC(TC), C(C), BaseAction(BaseAction), SaveTemps(SaveTemps),
EmbedBitcode(EmbedBitcode) {
assert(BaseAction && "Invalid base action.");
IsHostSelector = BaseAction->getOffloadingDeviceKind() == Action::OFK_None;
}
/// Check if a chain of actions can be combined and return the tool that can
/// handle the combination of actions. The pointer to the current inputs \a
/// Inputs and the list of offload actions \a CollapsedOffloadActions
/// connected to collapsed actions are updated accordingly. The latter enables
/// the caller of the selector to process them afterwards instead of just
/// dropping them. If no suitable tool is found, null will be returned.
const Tool *getTool(const ActionList *&Inputs,
ActionList &CollapsedOffloadAction) {
//
// Get the largest chain of actions that we could combine.
//
SmallVector<JobActionInfo, 5> ActionChain(1);
ActionChain.back().JA = BaseAction;
while (ActionChain.back().JA) {
const Action *CurAction = ActionChain.back().JA;
// Grow the chain by one element.
ActionChain.resize(ActionChain.size() + 1);
JobActionInfo &AI = ActionChain.back();
// Attempt to fill it with the
AI.JA =
getPrevDependentAction(CurAction->getInputs(), AI.SavedOffloadAction);
}
// Pop the last action info as it could not be filled.
ActionChain.pop_back();
//
// Attempt to combine actions. If all combining attempts failed, just return
// the tool of the provided action. At the end we attempt to combine the
// action with any preprocessor action it may depend on.
//
const Tool *T = combineAssembleBackendCompile(ActionChain, Inputs,
CollapsedOffloadAction);
if (!T)
T = combineAssembleBackend(ActionChain, Inputs, CollapsedOffloadAction);
if (!T)
T = combineBackendCompile(ActionChain, Inputs, CollapsedOffloadAction);
if (!T) {
Inputs = &BaseAction->getInputs();
T = TC.SelectTool(*BaseAction);
}
combineWithPreprocessor(T, Inputs, CollapsedOffloadAction);
return T;
}
};
}
/// Return a string that uniquely identifies the result of a job. The bound arch
/// is not necessarily represented in the toolchain's triple -- for example,
/// armv7 and armv7s both map to the same triple -- so we need both in our map.
/// Also, we need to add the offloading device kind, as the same tool chain can
/// be used for host and device for some programming models, e.g. OpenMP.
static std::string GetTriplePlusArchString(const ToolChain *TC,
StringRef BoundArch,
Action::OffloadKind OffloadKind) {
std::string TriplePlusArch = TC->getTriple().normalize();
if (!BoundArch.empty()) {
TriplePlusArch += "-";
TriplePlusArch += BoundArch;
}
TriplePlusArch += "-";
TriplePlusArch += Action::GetOffloadKindName(OffloadKind);
return TriplePlusArch;
}
InputInfo Driver::BuildJobsForAction(
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
Action::OffloadKind TargetDeviceOffloadKind) const {
std::pair<const Action *, std::string> ActionTC = {
A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};
auto CachedResult = CachedResults.find(ActionTC);
if (CachedResult != CachedResults.end()) {
return CachedResult->second;
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
InputInfo Result = BuildJobsForActionNoCache(
C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, TargetDeviceOffloadKind);
CachedResults[ActionTC] = Result;
return Result;
}
InputInfo Driver::BuildJobsForActionNoCache(
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
Action::OffloadKind TargetDeviceOffloadKind) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
InputInfoList OffloadDependencesInputInfo;
bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
// The offload action is expected to be used in four different situations.
//
// a) Set a toolchain/architecture/kind for a host action:
// Host Action 1 -> OffloadAction -> Host Action 2
//
// b) Set a toolchain/architecture/kind for a device action;
// Device Action 1 -> OffloadAction -> Device Action 2
//
// c) Specify a device dependence to a host action;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Device Action 1 _
// \
// Host Action 1 ---> OffloadAction -> Host Action 2
//
// d) Specify a host dependence to a device action.
// Host Action 1 _
// \
// Device Action 1 ---> OffloadAction -> Device Action 2
//
// For a) and b), we just return the job generated for the dependence. For
// c) and d) we override the current action with the host/device dependence
// if the current toolchain is host/device and set the offload dependences
// info with the jobs obtained from the device/host dependence(s).
// If there is a single device option, just generate the job for it.
if (OA->hasSingleDeviceDependence()) {
InputInfo DevA;
OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC,
const char *DepBoundArch) {
DevA =
BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel,
/*MultipleArchs*/ !!DepBoundArch, LinkingOutput,
CachedResults, DepA->getOffloadingDeviceKind());
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
});
return DevA;
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// If 'Action 2' is host, we generate jobs for the device dependences and
// override the current action with the host dependence. Otherwise, we
// generate the host dependences and override the action with the device
// dependence. The dependences can't therefore be a top-level action.
OA->doOnEachDependence(
/*IsHostDependence=*/BuildingForOffloadDevice,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
OffloadDependencesInputInfo.push_back(BuildJobsForAction(
C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false,
/*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults,
DepA->getOffloadingDeviceKind()));
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
});
A = BuildingForOffloadDevice
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)
: OA->getHostDependence();
}
if (const InputAction *IA = dyn_cast<InputAction>(A)) {
// FIXME: It would be nice to not claim this here; maybe the old scheme of
// just using Args was better?
const Arg &Input = IA->getInputArg();
Input.claim();
if (Input.getOption().matches(options::OPT_INPUT)) {
const char *Name = Input.getValue();
return InputInfo(A, Name, /* BaseInput = */ Name);
}
return InputInfo(A, &Input, /* BaseInput = */ "");
}
if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {
const ToolChain *TC;
StringRef ArchName = BAA->getArchName();
if (!ArchName.empty())
TC = &getToolChain(C.getArgs(),
computeTargetTriple(*this, DefaultTargetTriple,
C.getArgs(), ArchName));
else
TC = &C.getDefaultToolChain();
return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
MultipleArchs, LinkingOutput, CachedResults,
TargetDeviceOffloadKind);
}
const ActionList *Inputs = &A->getInputs();
const JobAction *JA = cast<JobAction>(A);
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
ActionList CollapsedOffloadActions;
ToolSelector TS(JA, *TC, C, isSaveTempsEnabled(),
embedBitcodeInObject() && !isUsingLTO());
const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions);
if (!T)
return InputInfo();
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// If we've collapsed action list that contained OffloadAction we
// need to build jobs for host/device-side inputs it may have held.
for (const auto *OA : CollapsedOffloadActions)
cast<OffloadAction>(OA)->doOnEachDependence(
/*IsHostDependence=*/BuildingForOffloadDevice,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
OffloadDependencesInputInfo.push_back(BuildJobsForAction(
C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
/*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults,
DepA->getOffloadingDeviceKind()));
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
});
// Only use pipes when there is exactly one input.
InputInfoList InputInfos;
for (const Action *Input : *Inputs) {
// Treat dsymutil and verify sub-jobs as being at the top-level too, they
// shouldn't get temporary output names.
// FIXME: Clean this up.
bool SubJobAtTopLevel =
AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A));
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
InputInfos.push_back(BuildJobsForAction(
C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, A->getOffloadingDeviceKind()));
}
// Always use the first input as the base input.
const char *BaseInput = InputInfos[0].getBaseInput();
// ... except dsymutil actions, which use their actual input as the base
// input.
if (JA->getType() == types::TY_dSYM)
BaseInput = InputInfos[0].getFilename();
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Append outputs of offload device jobs to the input list
if (!OffloadDependencesInputInfo.empty())
InputInfos.append(OffloadDependencesInputInfo.begin(),
OffloadDependencesInputInfo.end());
// Set the effective triple of the toolchain for the duration of this job.
llvm::Triple EffectiveTriple;
const ToolChain &ToolTC = T->getToolChain();
const ArgList &Args =
C.getArgsForToolChain(TC, BoundArch, A->getOffloadingDeviceKind());
if (InputInfos.size() != 1) {
EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args));
} else {
// Pass along the input type if it can be unambiguously determined.
EffectiveTriple = llvm::Triple(
ToolTC.ComputeEffectiveClangTriple(Args, InputInfos[0].getType()));
}
RegisterEffectiveTriple TripleRAII(ToolTC, EffectiveTriple);
2010-08-02 10:38:15 +08:00
// Determine the place to write output to, if any.
InputInfo Result;
InputInfoList UnbundlingResults;
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(JA)) {
// If we have an unbundling job, we need to create results for all the
// outputs. We also update the results cache so that other actions using
// this unbundling action can get the right results.
for (auto &UI : UA->getDependentActionsInfo()) {
assert(UI.DependentOffloadKind != Action::OFK_None &&
"Unbundling with no offloading??");
// Unbundling actions are never at the top level. When we generate the
// offloading prefix, we also do that for the host file because the
// unbundling action does not change the type of the output which can
// cause a overwrite.
std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
UI.DependentOffloadKind,
UI.DependentToolChain->getTriple().normalize(),
/*CreatePrefixForHost=*/true);
auto CurI = InputInfo(
UA, GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch,
/*AtTopLevel=*/false, MultipleArchs,
OffloadingPrefix),
BaseInput);
// Save the unbundling result.
UnbundlingResults.push_back(CurI);
// Get the unique string identifier for this dependence and cache the
// result.
CachedResults[{A, GetTriplePlusArchString(
UI.DependentToolChain, UI.DependentBoundArch,
UI.DependentOffloadKind)}] = CurI;
}
// Now that we have all the results generated, select the one that should be
// returned for the current depending action.
std::pair<const Action *, std::string> ActionTC = {
A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};
assert(CachedResults.find(ActionTC) != CachedResults.end() &&
"Result does not exist??");
Result = CachedResults[ActionTC];
} else if (JA->getType() == types::TY_Nothing)
Result = InputInfo(A, BaseInput);
else {
// We only have to generate a prefix for the host if this is not a top-level
// action.
std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
/*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
!AtTopLevel);
Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
AtTopLevel, MultipleArchs,
OffloadingPrefix),
BaseInput);
}
if (CCCPrintBindings && !CCGenDiagnostics) {
llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"'
<< " - \"" << T->getName() << "\", inputs: [";
for (unsigned i = 0, e = InputInfos.size(); i != e; ++i) {
llvm::errs() << InputInfos[i].getAsString();
if (i + 1 != e)
llvm::errs() << ", ";
}
if (UnbundlingResults.empty())
llvm::errs() << "], output: " << Result.getAsString() << "\n";
else {
llvm::errs() << "], outputs: [";
for (unsigned i = 0, e = UnbundlingResults.size(); i != e; ++i) {
llvm::errs() << UnbundlingResults[i].getAsString();
if (i + 1 != e)
llvm::errs() << ", ";
}
llvm::errs() << "] \n";
}
} else {
if (UnbundlingResults.empty())
T->ConstructJob(
C, *JA, Result, InputInfos,
C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),
LinkingOutput);
else
T->ConstructJobMultipleOutputs(
C, *JA, UnbundlingResults, InputInfos,
C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),
LinkingOutput);
}
return Result;
}
const char *Driver::getDefaultImageName() const {
llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple));
return Target.isOSWindows() ? "a.exe" : "a.out";
}
/// \brief Create output filename based on ArgValue, which could either be a
/// full filename, filename without extension, or a directory. If ArgValue
/// does not provide a filename, then use BaseName, and use the extension
/// suitable for FileType.
static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,
StringRef BaseName,
types::ID FileType) {
SmallString<128> Filename = ArgValue;
if (ArgValue.empty()) {
// If the argument is empty, output to BaseName in the current dir.
Filename = BaseName;
} else if (llvm::sys::path::is_separator(Filename.back())) {
// If the argument is a directory, output to BaseName in that dir.
llvm::sys::path::append(Filename, BaseName);
}
if (!llvm::sys::path::has_extension(ArgValue)) {
// If the argument didn't provide an extension, then set it.
const char *Extension = types::getTypeTempSuffix(FileType, true);
if (FileType == types::TY_Image &&
Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd)) {
// The output file is a dll.
Extension = "dll";
}
llvm::sys::path::replace_extension(Filename, Extension);
}
return Args.MakeArgString(Filename.c_str());
}
const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
const char *BaseInput,
StringRef BoundArch, bool AtTopLevel,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
bool MultipleArchs,
StringRef OffloadingPrefix) const {
llvm::PrettyStackTraceString CrashInfo("Computing output path");
// Output to a user requested destination?
if (AtTopLevel && !isa<DsymutilJobAction>(JA) && !isa<VerifyJobAction>(JA)) {
if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
return C.addResultFile(FinalOutput->getValue(), &JA);
}
// For /P, preprocess to file named after BaseInput.
if (C.getArgs().hasArg(options::OPT__SLASH_P)) {
assert(AtTopLevel && isa<PreprocessJobAction>(JA));
StringRef BaseName = llvm::sys::path::filename(BaseInput);
StringRef NameArg;
if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi))
NameArg = A->getValue();
return C.addResultFile(
MakeCLOutputFilename(C.getArgs(), NameArg, BaseName, types::TY_PP_C),
&JA);
}
// Default to writing to stdout?
if (AtTopLevel && !CCGenDiagnostics &&
(isa<PreprocessJobAction>(JA) || JA.getType() == types::TY_ModuleFile))
return "-";
// Is this the assembly listing for /FA?
if (JA.getType() == types::TY_PP_Asm &&
(C.getArgs().hasArg(options::OPT__SLASH_FA) ||
C.getArgs().hasArg(options::OPT__SLASH_Fa))) {
// Use /Fa and the input filename to determine the asm file name.
StringRef BaseName = llvm::sys::path::filename(BaseInput);
StringRef FaValue = C.getArgs().getLastArgValue(options::OPT__SLASH_Fa);
return C.addResultFile(
MakeCLOutputFilename(C.getArgs(), FaValue, BaseName, JA.getType()),
&JA);
}
// Output to a temporary file?
if ((!AtTopLevel && !isSaveTempsEnabled() &&
!C.getArgs().hasArg(options::OPT__SLASH_Fo)) ||
CCGenDiagnostics) {
StringRef Name = llvm::sys::path::filename(BaseInput);
std::pair<StringRef, StringRef> Split = Name.split('.');
std::string TmpName = GetTemporaryPath(
Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
return C.addTempFile(C.getArgs().MakeArgString(TmpName));
}
SmallString<128> BasePath(BaseInput);
StringRef BaseName;
// Dsymutil actions should use the full path.
if (isa<DsymutilJobAction>(JA) || isa<VerifyJobAction>(JA))
BaseName = BasePath;
else
BaseName = llvm::sys::path::filename(BasePath);
// Determine what the derived output name should be.
const char *NamedOutput;
if ((JA.getType() == types::TY_Object || JA.getType() == types::TY_LTO_BC) &&
C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) {
// The /Fo or /o flag decides the object filename.
StringRef Val =
C.getArgs()
.getLastArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)
->getValue();
NamedOutput =
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object);
} else if (JA.getType() == types::TY_Image &&
C.getArgs().hasArg(options::OPT__SLASH_Fe,
options::OPT__SLASH_o)) {
// The /Fe or /o flag names the linked file.
StringRef Val =
C.getArgs()
.getLastArg(options::OPT__SLASH_Fe, options::OPT__SLASH_o)
->getValue();
NamedOutput =
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Image);
} else if (JA.getType() == types::TY_Image) {
if (IsCLMode()) {
// clang-cl uses BaseName for the executable name.
NamedOutput =
MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image);
} else {
SmallString<128> Output(getDefaultImageName());
Output += OffloadingPrefix;
if (MultipleArchs && !BoundArch.empty()) {
Output += "-";
Output.append(BoundArch);
}
NamedOutput = C.getArgs().MakeArgString(Output.c_str());
clang-cl: Implement initial limited support for precompiled headers. In the gcc precompiled header model, one explicitly runs clang with `-x c++-header` on a .h file to produce a gch file, and then includes the header with `-include foo.h` and if a .gch file exists for that header it gets used. This is documented at http://clang.llvm.org/docs/UsersManual.html#precompiled-headers cl.exe's model is fairly different, and controlled by the two flags /Yc and /Yu. A pch file is generated as a side effect of a regular compilation when /Ycheader.h is passed. While the compilation is running, the compiler keeps track of #include lines in the main translation unit and writes everything up to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells the compiler to skip all code in the main TU up to and including `#include "header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu without an argument, in that case a `#pragma hrdstop` takes the role of controlling the point where pch ends and real code begins.) This patch implements limited support for this in that it requires the pch header to be passed as a /FI force include flag – with this restriction, it can be implemented almost completely in the driver with fairly small amounts of code. For /Yu, this is trivial, and for /Yc a separate pch action is added that runs before the actual compilation. After r261774, the first failing command makes a compilation stop – this means if the pch fails to build the main compilation won't run, which is what we want. However, in /fallback builds we need to run the main compilation even if the pch build fails so that the main compilation's fallback can run. To achieve this, add a ForceSuccessCommand that pretends that the pch build always succeeded in /fallback builds (the main compilation will then fail to open the pch and run the fallback cl.exe invocation). If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl will now emit a "not implemented yet; flag ignored" warning that can be disabled using -Wno-clang-cl-pch. Since clang-cl doesn't yet serialize some important things (most notably `pragma comment(lib, ...)`, this feature is disabled by default and only enabled by an internal driver flag. Once it's more stable, this internal flag will disappear. (The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it does. Implementing support for this should be possible with the approach in this patch with minimal frontend changes by passing a --stop-at / --start-at flag from the driver to the frontend. This is left for a follow-up. I don't think we ever want to support `#pragma hdrstop`, and supporting it with this approach isn't easy: This approach relies on the driver knowing the pch filename in advance, and `#pragma hdrstop(out.pch)` can set the output filename, so the driver can't know about it in advance.) clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe would put them, but the pch file format is of course incompatible. This has ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in /fallback builds. http://reviews.llvm.org/D17695 llvm-svn: 262420
2016-03-02 07:16:44 +08:00
}
} else if (JA.getType() == types::TY_PCH && IsCLMode()) {
NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName));
} else {
const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
assert(Suffix && "All types used for output should have a suffix.");
std::string::size_type End = std::string::npos;
if (!types::appendSuffixForType(JA.getType()))
End = BaseName.rfind('.');
SmallString<128> Suffixed(BaseName.substr(0, End));
Suffixed += OffloadingPrefix;
if (MultipleArchs && !BoundArch.empty()) {
Suffixed += "-";
Suffixed.append(BoundArch);
}
// When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for
// the unoptimized bitcode so that it does not get overwritten by the ".bc"
// optimized bitcode output.
if (!AtTopLevel && C.getArgs().hasArg(options::OPT_emit_llvm) &&
JA.getType() == types::TY_LLVM_BC)
Suffixed += ".tmp";
Suffixed += '.';
Suffixed += Suffix;
NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str());
}
// Prepend object file path if -save-temps=obj
if (!AtTopLevel && isSaveTempsObj() && C.getArgs().hasArg(options::OPT_o) &&
JA.getType() != types::TY_PCH) {
Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
SmallString<128> TempPath(FinalOutput->getValue());
llvm::sys::path::remove_filename(TempPath);
StringRef OutputFileName = llvm::sys::path::filename(NamedOutput);
llvm::sys::path::append(TempPath, OutputFileName);
NamedOutput = C.getArgs().MakeArgString(TempPath.c_str());
}
// If we're saving temps and the temp file conflicts with the input file,
// then avoid overwriting input file.
if (!AtTopLevel && isSaveTempsEnabled() && NamedOutput == BaseName) {
bool SameFile = false;
SmallString<256> Result;
llvm::sys::fs::current_path(Result);
llvm::sys::path::append(Result, BaseName);
llvm::sys::fs::equivalent(BaseInput, Result.c_str(), SameFile);
// Must share the same path to conflict.
if (SameFile) {
StringRef Name = llvm::sys::path::filename(BaseInput);
std::pair<StringRef, StringRef> Split = Name.split('.');
std::string TmpName = GetTemporaryPath(
Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
return C.addTempFile(C.getArgs().MakeArgString(TmpName));
}
}
// As an annoying special case, PCH generation doesn't strip the pathname.
if (JA.getType() == types::TY_PCH && !IsCLMode()) {
llvm::sys::path::remove_filename(BasePath);
if (BasePath.empty())
BasePath = NamedOutput;
else
llvm::sys::path::append(BasePath, NamedOutput);
return C.addResultFile(C.getArgs().MakeArgString(BasePath.c_str()), &JA);
} else {
return C.addResultFile(NamedOutput, &JA);
}
}
std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const {
// Respect a limited subset of the '-Bprefix' functionality in GCC by
2012-10-04 16:08:56 +08:00
// attempting to use this prefix when looking for file paths.
for (const std::string &Dir : PrefixDirs) {
if (Dir.empty())
continue;
SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return P.str();
}
SmallString<128> P(ResourceDir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return P.str();
for (const std::string &Dir : TC.getFilePaths()) {
if (Dir.empty())
continue;
SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return P.str();
}
return Name;
}
void Driver::generatePrefixedToolNames(
StringRef Tool, const ToolChain &TC,
SmallVectorImpl<std::string> &Names) const {
// FIXME: Needs a better variable than DefaultTargetTriple
Names.emplace_back((DefaultTargetTriple + "-" + Tool).str());
Names.emplace_back(Tool);
// Allow the discovery of tools prefixed with LLVM's default target triple.
std::string LLVMDefaultTargetTriple = llvm::sys::getDefaultTargetTriple();
if (LLVMDefaultTargetTriple != DefaultTargetTriple)
Names.emplace_back((LLVMDefaultTargetTriple + "-" + Tool).str());
}
static bool ScanDirForExecutable(SmallString<128> &Dir,
ArrayRef<std::string> Names) {
for (const auto &Name : Names) {
llvm::sys::path::append(Dir, Name);
if (llvm::sys::fs::can_execute(Twine(Dir)))
return true;
llvm::sys::path::remove_filename(Dir);
}
return false;
}
std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const {
SmallVector<std::string, 2> TargetSpecificExecutables;
generatePrefixedToolNames(Name, TC, TargetSpecificExecutables);
// Respect a limited subset of the '-Bprefix' functionality in GCC by
2012-10-04 16:08:56 +08:00
// attempting to use this prefix when looking for program paths.
for (const auto &PrefixDir : PrefixDirs) {
if (llvm::sys::fs::is_directory(PrefixDir)) {
SmallString<128> P(PrefixDir);
if (ScanDirForExecutable(P, TargetSpecificExecutables))
return P.str();
} else {
SmallString<128> P((PrefixDir + Name).str());
if (llvm::sys::fs::can_execute(Twine(P)))
return P.str();
}
}
const ToolChain::path_list &List = TC.getProgramPaths();
for (const auto &Path : List) {
SmallString<128> P(Path);
if (ScanDirForExecutable(P, TargetSpecificExecutables))
return P.str();
}
// If all else failed, search the path.
2014-11-08 05:30:32 +08:00
for (const auto &TargetSpecificExecutable : TargetSpecificExecutables)
if (llvm::ErrorOr<std::string> P =
llvm::sys::findProgramByName(TargetSpecificExecutable))
return *P;
return Name;
}
std::string Driver::GetTemporaryPath(StringRef Prefix, StringRef Suffix) const {
SmallString<128> Path;
std::error_code EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path);
if (EC) {
Diag(clang::diag::err_unable_to_make_temp) << EC.message();
return "";
}
return Path.str();
clang-cl: Implement initial limited support for precompiled headers. In the gcc precompiled header model, one explicitly runs clang with `-x c++-header` on a .h file to produce a gch file, and then includes the header with `-include foo.h` and if a .gch file exists for that header it gets used. This is documented at http://clang.llvm.org/docs/UsersManual.html#precompiled-headers cl.exe's model is fairly different, and controlled by the two flags /Yc and /Yu. A pch file is generated as a side effect of a regular compilation when /Ycheader.h is passed. While the compilation is running, the compiler keeps track of #include lines in the main translation unit and writes everything up to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells the compiler to skip all code in the main TU up to and including `#include "header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu without an argument, in that case a `#pragma hrdstop` takes the role of controlling the point where pch ends and real code begins.) This patch implements limited support for this in that it requires the pch header to be passed as a /FI force include flag – with this restriction, it can be implemented almost completely in the driver with fairly small amounts of code. For /Yu, this is trivial, and for /Yc a separate pch action is added that runs before the actual compilation. After r261774, the first failing command makes a compilation stop – this means if the pch fails to build the main compilation won't run, which is what we want. However, in /fallback builds we need to run the main compilation even if the pch build fails so that the main compilation's fallback can run. To achieve this, add a ForceSuccessCommand that pretends that the pch build always succeeded in /fallback builds (the main compilation will then fail to open the pch and run the fallback cl.exe invocation). If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl will now emit a "not implemented yet; flag ignored" warning that can be disabled using -Wno-clang-cl-pch. Since clang-cl doesn't yet serialize some important things (most notably `pragma comment(lib, ...)`, this feature is disabled by default and only enabled by an internal driver flag. Once it's more stable, this internal flag will disappear. (The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it does. Implementing support for this should be possible with the approach in this patch with minimal frontend changes by passing a --stop-at / --start-at flag from the driver to the frontend. This is left for a follow-up. I don't think we ever want to support `#pragma hdrstop`, and supporting it with this approach isn't easy: This approach relies on the driver knowing the pch filename in advance, and `#pragma hdrstop(out.pch)` can set the output filename, so the driver can't know about it in advance.) clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe would put them, but the pch file format is of course incompatible. This has ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in /fallback builds. http://reviews.llvm.org/D17695 llvm-svn: 262420
2016-03-02 07:16:44 +08:00
}
std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const {
SmallString<128> Output;
if (Arg *FpArg = C.getArgs().getLastArg(options::OPT__SLASH_Fp)) {
// FIXME: If anybody needs it, implement this obscure rule:
// "If you specify a directory without a file name, the default file name
// is VCx0.pch., where x is the major version of Visual C++ in use."
Output = FpArg->getValue();
// "If you do not specify an extension as part of the path name, an
// extension of .pch is assumed. "
if (!llvm::sys::path::has_extension(Output))
Output += ".pch";
} else {
Output = BaseName;
llvm::sys::path::replace_extension(Output, ".pch");
}
return Output.str();
}
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
const ToolChain &Driver::getToolChain(const ArgList &Args,
const llvm::Triple &Target) const {
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
auto &TC = ToolChains[Target.str()];
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
if (!TC) {
switch (Target.getOS()) {
case llvm::Triple::Haiku:
TC = llvm::make_unique<toolchains::Haiku>(*this, Target, Args);
break;
case llvm::Triple::CloudABI:
TC = llvm::make_unique<toolchains::CloudABI>(*this, Target, Args);
break;
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
case llvm::Triple::Darwin:
case llvm::Triple::MacOSX:
case llvm::Triple::IOS:
case llvm::Triple::TvOS:
case llvm::Triple::WatchOS:
TC = llvm::make_unique<toolchains::DarwinClang>(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::DragonFly:
TC = llvm::make_unique<toolchains::DragonFly>(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::OpenBSD:
TC = llvm::make_unique<toolchains::OpenBSD>(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::Bitrig:
TC = llvm::make_unique<toolchains::Bitrig>(*this, Target, Args);
break;
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
case llvm::Triple::NetBSD:
TC = llvm::make_unique<toolchains::NetBSD>(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::FreeBSD:
TC = llvm::make_unique<toolchains::FreeBSD>(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::Minix:
TC = llvm::make_unique<toolchains::Minix>(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::Linux:
case llvm::Triple::ELFIAMCU:
if (Target.getArch() == llvm::Triple::hexagon)
TC = llvm::make_unique<toolchains::HexagonToolChain>(*this, Target,
Args);
else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
!Target.hasEnvironment())
TC = llvm::make_unique<toolchains::MipsLLVMToolChain>(*this, Target,
Args);
else
TC = llvm::make_unique<toolchains::Linux>(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::NaCl:
TC = llvm::make_unique<toolchains::NaClToolChain>(*this, Target, Args);
break;
case llvm::Triple::Fuchsia:
TC = llvm::make_unique<toolchains::Fuchsia>(*this, Target, Args);
break;
case llvm::Triple::Solaris:
TC = llvm::make_unique<toolchains::Solaris>(*this, Target, Args);
break;
case llvm::Triple::AMDHSA:
TC = llvm::make_unique<toolchains::AMDGPUToolChain>(*this, Target, Args);
break;
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
case llvm::Triple::Win32:
switch (Target.getEnvironment()) {
default:
if (Target.isOSBinFormatELF())
TC = llvm::make_unique<toolchains::Generic_ELF>(*this, Target, Args);
else if (Target.isOSBinFormatMachO())
TC = llvm::make_unique<toolchains::MachO>(*this, Target, Args);
else
TC = llvm::make_unique<toolchains::Generic_GCC>(*this, Target, Args);
break;
case llvm::Triple::GNU:
TC = llvm::make_unique<toolchains::MinGW>(*this, Target, Args);
break;
case llvm::Triple::Itanium:
TC = llvm::make_unique<toolchains::CrossWindowsToolChain>(*this, Target,
Args);
break;
case llvm::Triple::MSVC:
case llvm::Triple::UnknownEnvironment:
TC = llvm::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
break;
}
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::PS4:
TC = llvm::make_unique<toolchains::PS4CPU>(*this, Target, Args);
break;
case llvm::Triple::Contiki:
TC = llvm::make_unique<toolchains::Contiki>(*this, Target, Args);
break;
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
default:
// Of these targets, Hexagon is the only one that might have
// an OS of Linux, in which case it got handled above already.
switch (Target.getArch()) {
case llvm::Triple::tce:
TC = llvm::make_unique<toolchains::TCEToolChain>(*this, Target, Args);
break;
case llvm::Triple::tcele:
TC = llvm::make_unique<toolchains::TCELEToolChain>(*this, Target, Args);
break;
case llvm::Triple::hexagon:
TC = llvm::make_unique<toolchains::HexagonToolChain>(*this, Target,
Args);
break;
case llvm::Triple::lanai:
TC = llvm::make_unique<toolchains::LanaiToolChain>(*this, Target, Args);
break;
case llvm::Triple::xcore:
TC = llvm::make_unique<toolchains::XCoreToolChain>(*this, Target, Args);
break;
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
TC = llvm::make_unique<toolchains::WebAssembly>(*this, Target, Args);
break;
case llvm::Triple::avr:
TC = llvm::make_unique<toolchains::AVRToolChain>(*this, Target, Args);
break;
default:
if (Target.getVendor() == llvm::Triple::Myriad)
TC = llvm::make_unique<toolchains::MyriadToolChain>(*this, Target,
Args);
else if (Target.isOSBinFormatELF())
TC = llvm::make_unique<toolchains::Generic_ELF>(*this, Target, Args);
else if (Target.isOSBinFormatMachO())
TC = llvm::make_unique<toolchains::MachO>(*this, Target, Args);
else
TC = llvm::make_unique<toolchains::Generic_GCC>(*this, Target, Args);
}
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
}
}
// Intentionally omitted from the switch above: llvm::Triple::CUDA. CUDA
// compiles always need two toolchains, the CUDA toolchain and the host
// toolchain. So the only valid way to create a CUDA toolchain is via
// CreateOffloadingDeviceToolChains.
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
return *TC;
}
bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
// Say "no" if there is not exactly one input of a type clang understands.
if (JA.size() != 1 ||
!types::isAcceptedByClang((*JA.input_begin())->getType()))
return false;
// And say "no" if this is not a kind of action clang understands.
if (!isa<PreprocessJobAction>(JA) && !isa<PrecompileJobAction>(JA) &&
!isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
return false;
return true;
}
/// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and return the
/// grouped values as integers. Numbers which are not provided are set to 0.
///
/// \return True if the entire string was parsed (9.2), or all groups were
/// parsed (10.3.5extrastuff).
bool Driver::GetReleaseVersion(StringRef Str, unsigned &Major, unsigned &Minor,
unsigned &Micro, bool &HadExtra) {
HadExtra = false;
Major = Minor = Micro = 0;
if (Str.empty())
return false;
if (Str.consumeInteger(10, Major))
return false;
if (Str.empty())
return true;
if (Str[0] != '.')
return false;
Str = Str.drop_front(1);
if (Str.consumeInteger(10, Minor))
return false;
if (Str.empty())
return true;
if (Str[0] != '.')
return false;
Str = Str.drop_front(1);
if (Str.consumeInteger(10, Micro))
return false;
if (!Str.empty())
HadExtra = true;
return true;
}
/// Parse digits from a string \p Str and fulfill \p Digits with
/// the parsed numbers. This method assumes that the max number of
/// digits to look for is equal to Digits.size().
///
/// \return True if the entire string was parsed and there are
/// no extra characters remaining at the end.
bool Driver::GetReleaseVersion(StringRef Str,
MutableArrayRef<unsigned> Digits) {
if (Str.empty())
return false;
unsigned CurDigit = 0;
while (CurDigit < Digits.size()) {
unsigned Digit;
if (Str.consumeInteger(10, Digit))
return false;
Digits[CurDigit] = Digit;
if (Str.empty())
return true;
if (Str[0] != '.')
return false;
Str = Str.drop_front(1);
CurDigit++;
}
// More digits than requested, bail out...
return false;
}
std::pair<unsigned, unsigned> Driver::getIncludeExcludeOptionFlagMasks() const {
unsigned IncludedFlagsBitmask = 0;
unsigned ExcludedFlagsBitmask = options::NoDriverOption;
if (Mode == CLMode) {
// Include CL and Core options.
IncludedFlagsBitmask |= options::CLOption;
IncludedFlagsBitmask |= options::CoreOption;
} else {
ExcludedFlagsBitmask |= options::CLOption;
}
return std::make_pair(IncludedFlagsBitmask, ExcludedFlagsBitmask);
}
bool clang::driver::isOptimizationLevelFast(const ArgList &Args) {
return Args.hasFlag(options::OPT_Ofast, options::OPT_O_Group, false);
}