llvm-project/clang/lib/Driver/Driver.cpp

3248 lines
119 KiB
C++
Raw Normal View History

//===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "clang/Driver/Driver.h"
#include "InputInfo.h"
#include "ToolChains.h"
#include "clang/Basic/Version.h"
#include "clang/Basic/VirtualFileSystem.h"
#include "clang/Config/config.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Job.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptSpecifier.h"
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <memory>
#include <utility>
using namespace clang::driver;
using namespace clang;
using namespace llvm::opt;
Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
DiagnosticsEngine &Diags,
IntrusiveRefCntPtr<vfs::FileSystem> VFS)
: Opts(createDriverOptTable()), Diags(Diags), VFS(std::move(VFS)),
Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone),
LTOMode(LTOK_None), ClangExecutable(ClangExecutable),
SysRoot(DEFAULT_SYSROOT), UseStdLib(true),
DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr),
CCPrintHeadersFilename(nullptr), CCLogDiagnosticsFilename(nullptr),
CCCPrintBindings(false), CCPrintHeaders(false), CCLogDiagnostics(false),
CCGenDiagnostics(false), DefaultTargetTriple(DefaultTargetTriple),
CCCGenericGCCName(""), CheckInputsExist(true), CCCUsePCH(true),
SuppressMissingInputWarning(false) {
// Provide a sane fallback if no VFS is specified.
if (!this->VFS)
this->VFS = vfs::getRealFileSystem();
Name = llvm::sys::path::filename(ClangExecutable);
Dir = llvm::sys::path::parent_path(ClangExecutable);
InstalledDir = Dir; // Provide a sensible default installed dir.
// Compute the path to the resource directory.
StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
SmallString<128> P(Dir);
if (ClangResourceDir != "") {
llvm::sys::path::append(P, ClangResourceDir);
} else {
StringRef ClangLibdirSuffix(CLANG_LIBDIR_SUFFIX);
llvm::sys::path::append(P, "..", Twine("lib") + ClangLibdirSuffix, "clang",
CLANG_VERSION_STRING);
}
ResourceDir = P.str();
}
Driver::~Driver() {
delete Opts;
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
llvm::DeleteContainerSeconds(ToolChains);
}
void Driver::ParseDriverMode(StringRef ProgramName,
ArrayRef<const char *> Args) {
auto Default = ToolChain::getTargetAndModeFromProgramName(ProgramName);
StringRef DefaultMode(Default.second);
setDriverModeFromOption(DefaultMode);
for (const char *ArgPtr : Args) {
// Ingore nullptrs, they are response file's EOL markers
if (ArgPtr == nullptr)
continue;
const StringRef Arg = ArgPtr;
setDriverModeFromOption(Arg);
}
}
void Driver::setDriverModeFromOption(StringRef Opt) {
const std::string OptName =
getOpts().getOption(options::OPT_driver_mode).getPrefixedName();
if (!Opt.startswith(OptName))
return;
StringRef Value = Opt.drop_front(OptName.size());
const unsigned M = llvm::StringSwitch<unsigned>(Value)
.Case("gcc", GCCMode)
.Case("g++", GXXMode)
.Case("cpp", CPPMode)
.Case("cl", CLMode)
.Default(~0U);
if (M != ~0U)
Mode = static_cast<DriverMode>(M);
else
Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
}
InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings) {
llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
unsigned IncludedFlagsBitmask;
unsigned ExcludedFlagsBitmask;
std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
getIncludeExcludeOptionFlagMasks();
unsigned MissingArgIndex, MissingArgCount;
InputArgList Args =
getOpts().ParseArgs(ArgStrings, MissingArgIndex, MissingArgCount,
IncludedFlagsBitmask, ExcludedFlagsBitmask);
// Check for missing argument error.
if (MissingArgCount)
Diag(clang::diag::err_drv_missing_argument)
<< Args.getArgString(MissingArgIndex) << MissingArgCount;
// Check for unsupported options.
for (const Arg *A : Args) {
if (A->getOption().hasFlag(options::Unsupported)) {
Diag(clang::diag::err_drv_unsupported_opt) << A->getAsString(Args);
continue;
}
// Warn about -mcpu= without an argument.
if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) {
Diag(clang::diag::warn_drv_empty_joined_argument) << A->getAsString(Args);
}
}
for (const Arg *A : Args.filtered(options::OPT_UNKNOWN))
Diags.Report(IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl :
diag::err_drv_unknown_argument)
<< A->getAsString(Args);
return Args;
}
// Determine which compilation mode we are in. We look for options which
// affect the phase, starting with the earliest phases, and record which
// option we used to determine the final phase.
phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
Arg **FinalPhaseArg) const {
Arg *PhaseArg = nullptr;
phases::ID FinalPhase;
2011-08-18 06:59:59 +08:00
// -{E,EP,P,M,MM} only run the preprocessor.
if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) ||
(PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) ||
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) {
FinalPhase = phases::Preprocess;
2011-08-18 06:59:59 +08:00
Unrevert r280035 now that the clang-cl bug it exposed has been fixed by r280133. Original commit message: C++ Modules TS: driver support for building modules. This works as follows: we add --precompile to the existing gamut of options for specifying how far to go when compiling an input (-E, -c, -S, etc.). This flag specifies that an input is taken to the precompilation step and no further, and this can be specified when building a .pcm from a module interface or when building a .pch from a header file. The .cppm extension (and some related extensions) are implicitly recognized as C++ module interface files. If --precompile is /not/ specified, the file is compiled (via a .pcm) to a .o file containing the code for the module (and then potentially also assembled and linked, if -S, -c, etc. are not specified). We do not yet suppress the emission of object code for other users of the module interface, so for now this will only work if everything in the .cppm file has vague linkage. As with the existing support for module-map modules, prebuilt modules can be provided as compiler inputs either via the -fmodule-file= command-line argument or via files named ModuleName.pcm in one of the directories specified via -fprebuilt-module-path=. This also exposes the -fmodules-ts cc1 flag in the driver. This is still experimental, and in particular, the concrete syntax is subject to change as the Modules TS evolves in the C++ committee. Unlike -fmodules, this flag does not enable support for implicitly loading module maps nor building modules via the module cache, but those features can be turned on separately and used in conjunction with the Modules TS support. llvm-svn: 280134
2016-08-31 03:06:26 +08:00
// --precompile only runs up to precompilation.
} else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) {
FinalPhase = phases::Precompile;
// -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) ||
(PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) ||
(PhaseArg = DAL.getLastArg(options::OPT_verify_pch)) ||
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) ||
(PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) ||
(PhaseArg = DAL.getLastArg(options::OPT__migrate)) ||
(PhaseArg = DAL.getLastArg(options::OPT__analyze,
2012-03-07 07:14:35 +08:00
options::OPT__analyze_auto)) ||
(PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
FinalPhase = phases::Compile;
// -S only runs up to the backend.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
FinalPhase = phases::Backend;
// -c compilation only runs up to the assembler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
FinalPhase = phases::Assemble;
// Otherwise do everything.
} else
FinalPhase = phases::Link;
if (FinalPhaseArg)
*FinalPhaseArg = PhaseArg;
return FinalPhase;
}
static Arg *MakeInputArg(DerivedArgList &Args, OptTable *Opts,
StringRef Value) {
Arg *A = new Arg(Opts->getOption(options::OPT_INPUT), Value,
Args.getBaseArgs().MakeIndex(Value), Value.data());
Args.AddSynthesizedArg(A);
A->claim();
return A;
}
DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
DerivedArgList *DAL = new DerivedArgList(Args);
bool HasNostdlib = Args.hasArg(options::OPT_nostdlib);
bool HasNodefaultlib = Args.hasArg(options::OPT_nodefaultlibs);
for (Arg *A : Args) {
// Unfortunately, we have to parse some forwarding options (-Xassembler,
// -Xlinker, -Xpreprocessor) because we either integrate their functionality
// (assembler and preprocessor), or bypass a previous driver ('collect2').
// Rewrite linker options, to replace --no-demangle with a custom internal
// option.
if ((A->getOption().matches(options::OPT_Wl_COMMA) ||
A->getOption().matches(options::OPT_Xlinker)) &&
A->containsValue("--no-demangle")) {
// Add the rewritten no-demangle argument.
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_Xlinker__no_demangle));
// Add the remaining values as Xlinker arguments.
for (StringRef Val : A->getValues())
if (Val != "--no-demangle")
DAL->AddSeparateArg(A, Opts->getOption(options::OPT_Xlinker), Val);
continue;
}
// Rewrite preprocessor options, to replace -Wp,-MD,FOO which is used by
// some build systems. We don't try to be complete here because we don't
// care to encourage this usage model.
if (A->getOption().matches(options::OPT_Wp_COMMA) &&
(A->getValue(0) == StringRef("-MD") ||
A->getValue(0) == StringRef("-MMD"))) {
// Rewrite to -MD/-MMD along with -MF.
if (A->getValue(0) == StringRef("-MD"))
DAL->AddFlagArg(A, Opts->getOption(options::OPT_MD));
else
DAL->AddFlagArg(A, Opts->getOption(options::OPT_MMD));
if (A->getNumValues() == 2)
DAL->AddSeparateArg(A, Opts->getOption(options::OPT_MF),
A->getValue(1));
continue;
}
// Rewrite reserved library names.
if (A->getOption().matches(options::OPT_l)) {
StringRef Value = A->getValue();
// Rewrite unless -nostdlib is present.
if (!HasNostdlib && !HasNodefaultlib && Value == "stdc++") {
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_stdcxx));
continue;
}
// Rewrite unconditionally.
if (Value == "cc_kext") {
DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_cckext));
continue;
}
}
// Pick up inputs via the -- option.
if (A->getOption().matches(options::OPT__DASH_DASH)) {
A->claim();
for (StringRef Val : A->getValues())
DAL->append(MakeInputArg(*DAL, Opts, Val));
continue;
}
DAL->append(A);
}
// Enforce -static if -miamcu is present.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false))
DAL->AddFlagArg(0, Opts->getOption(options::OPT_static));
// Add a default value of -mlinker-version=, if one was given and the user
// didn't specify one.
#if defined(HOST_LINK_VERSION)
if (!Args.hasArg(options::OPT_mlinker_version_EQ) &&
strlen(HOST_LINK_VERSION) > 0) {
DAL->AddJoinedArg(0, Opts->getOption(options::OPT_mlinker_version_EQ),
HOST_LINK_VERSION);
DAL->getLastArg(options::OPT_mlinker_version_EQ)->claim();
}
#endif
return DAL;
}
/// \brief Compute target triple from args.
///
/// This routine provides the logic to compute a target triple from various
/// args passed to the driver and the default triple string.
static llvm::Triple computeTargetTriple(const Driver &D,
StringRef DefaultTargetTriple,
const ArgList &Args,
StringRef DarwinArchName = "") {
// FIXME: Already done in Compilation *Driver::BuildCompilation
if (const Arg *A = Args.getLastArg(options::OPT_target))
DefaultTargetTriple = A->getValue();
llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple));
// Handle Apple-specific options available here.
if (Target.isOSBinFormatMachO()) {
// If an explict Darwin arch name is given, that trumps all.
if (!DarwinArchName.empty()) {
tools::darwin::setTripleTypeForMachOArchName(Target, DarwinArchName);
return Target;
}
// Handle the Darwin '-arch' flag.
if (Arg *A = Args.getLastArg(options::OPT_arch)) {
StringRef ArchName = A->getValue();
tools::darwin::setTripleTypeForMachOArchName(Target, ArchName);
}
}
// Handle pseudo-target flags '-mlittle-endian'/'-EL' and
// '-mbig-endian'/'-EB'.
if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
options::OPT_mbig_endian)) {
if (A->getOption().matches(options::OPT_mlittle_endian)) {
llvm::Triple LE = Target.getLittleEndianArchVariant();
if (LE.getArch() != llvm::Triple::UnknownArch)
Target = std::move(LE);
} else {
llvm::Triple BE = Target.getBigEndianArchVariant();
if (BE.getArch() != llvm::Triple::UnknownArch)
Target = std::move(BE);
}
}
// Skip further flag support on OSes which don't support '-m32' or '-m64'.
if (Target.getArch() == llvm::Triple::tce ||
Target.getOS() == llvm::Triple::Minix)
return Target;
// Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'.
Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32,
options::OPT_m32, options::OPT_m16);
if (A) {
llvm::Triple::ArchType AT = llvm::Triple::UnknownArch;
if (A->getOption().matches(options::OPT_m64)) {
AT = Target.get64BitArchVariant().getArch();
if (Target.getEnvironment() == llvm::Triple::GNUX32)
Target.setEnvironment(llvm::Triple::GNU);
} else if (A->getOption().matches(options::OPT_mx32) &&
Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) {
AT = llvm::Triple::x86_64;
Target.setEnvironment(llvm::Triple::GNUX32);
} else if (A->getOption().matches(options::OPT_m32)) {
AT = Target.get32BitArchVariant().getArch();
if (Target.getEnvironment() == llvm::Triple::GNUX32)
Target.setEnvironment(llvm::Triple::GNU);
} else if (A->getOption().matches(options::OPT_m16) &&
Target.get32BitArchVariant().getArch() == llvm::Triple::x86) {
AT = llvm::Triple::x86;
Target.setEnvironment(llvm::Triple::CODE16);
}
if (AT != llvm::Triple::UnknownArch && AT != Target.getArch())
Target.setArch(AT);
}
// Handle -miamcu flag.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86)
D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu"
<< Target.str();
if (A && !A->getOption().matches(options::OPT_m32))
D.Diag(diag::err_drv_argument_not_allowed_with)
<< "-miamcu" << A->getBaseArg().getAsString(Args);
Target.setArch(llvm::Triple::x86);
Target.setArchName("i586");
Target.setEnvironment(llvm::Triple::UnknownEnvironment);
Target.setEnvironmentName("");
Target.setOS(llvm::Triple::ELFIAMCU);
Target.setVendor(llvm::Triple::UnknownVendor);
Target.setVendorName("intel");
}
return Target;
}
// \brief Parse the LTO options and record the type of LTO compilation
// based on which -f(no-)?lto(=.*)? option occurs last.
void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
LTOMode = LTOK_None;
if (!Args.hasFlag(options::OPT_flto, options::OPT_flto_EQ,
options::OPT_fno_lto, false))
return;
StringRef LTOName("full");
const Arg *A = Args.getLastArg(options::OPT_flto_EQ);
if (A)
LTOName = A->getValue();
LTOMode = llvm::StringSwitch<LTOKind>(LTOName)
.Case("full", LTOK_Full)
.Case("thin", LTOK_Thin)
.Default(LTOK_Unknown);
if (LTOMode == LTOK_Unknown) {
assert(A);
Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName()
<< A->getValue();
}
}
void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
InputList &Inputs) {
//
// CUDA
//
// We need to generate a CUDA toolchain if any of the inputs has a CUDA type.
if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
return types::isCuda(I.first);
})) {
const ToolChain &TC = getToolChain(
C.getInputArgs(),
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
llvm::Triple(C.getSingleOffloadToolChain<Action::OFK_Host>()
->getTriple()
.isArch64Bit()
? "nvptx64-nvidia-cuda"
: "nvptx-nvidia-cuda"));
C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
}
//
// TODO: Add support for other offloading programming models here.
//
return;
}
Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
llvm::PrettyStackTraceString CrashInfo("Compilation construction");
2011-08-18 06:59:59 +08:00
// FIXME: Handle environment options which affect driver behavior, somewhere
2012-03-13 05:24:57 +08:00
// (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS.
if (Optional<std::string> CompilerPathValue =
llvm::sys::Process::GetEnv("COMPILER_PATH")) {
StringRef CompilerPath = *CompilerPathValue;
while (!CompilerPath.empty()) {
std::pair<StringRef, StringRef> Split =
CompilerPath.split(llvm::sys::EnvPathSeparator);
PrefixDirs.push_back(Split.first);
CompilerPath = Split.second;
}
}
// We look for the driver mode option early, because the mode can affect
// how other options are parsed.
ParseDriverMode(ClangExecutable, ArgList.slice(1));
// FIXME: What are we going to do with -V and -b?
// FIXME: This stuff needs to go into the Compilation, not the driver.
bool CCCPrintPhases;
InputArgList Args = ParseArgStrings(ArgList.slice(1));
// Silence driver warnings if requested
Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
// -no-canonical-prefixes is used very early in main.
Args.ClaimAllArgs(options::OPT_no_canonical_prefixes);
// Ignore -pipe.
Args.ClaimAllArgs(options::OPT_pipe);
// Extract -ccc args.
//
// FIXME: We need to figure out where this behavior should live. Most of it
// should be outside in the client; the parts that aren't should have proper
// options, either by introducing new ones or by overloading gcc ones like -V
// or -b.
CCCPrintPhases = Args.hasArg(options::OPT_ccc_print_phases);
CCCPrintBindings = Args.hasArg(options::OPT_ccc_print_bindings);
if (const Arg *A = Args.getLastArg(options::OPT_ccc_gcc_name))
CCCGenericGCCName = A->getValue();
CCCUsePCH =
Args.hasFlag(options::OPT_ccc_pch_is_pch, options::OPT_ccc_pch_is_pth);
// FIXME: DefaultTargetTriple is used by the target-prefixed calls to as/ld
// and getToolChain is const.
if (IsCLMode()) {
// clang-cl targets MSVC-style Win32.
llvm::Triple T(DefaultTargetTriple);
T.setOS(llvm::Triple::Win32);
T.setVendor(llvm::Triple::PC);
T.setEnvironment(llvm::Triple::MSVC);
DefaultTargetTriple = T.str();
}
if (const Arg *A = Args.getLastArg(options::OPT_target))
DefaultTargetTriple = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT_ccc_install_dir))
Dir = InstalledDir = A->getValue();
for (const Arg *A : Args.filtered(options::OPT_B)) {
A->claim();
PrefixDirs.push_back(A->getValue(0));
}
if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ))
SysRoot = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT__dyld_prefix_EQ))
DyldPrefix = A->getValue();
if (Args.hasArg(options::OPT_nostdlib))
UseStdLib = false;
if (const Arg *A = Args.getLastArg(options::OPT_resource_dir))
ResourceDir = A->getValue();
if (const Arg *A = Args.getLastArg(options::OPT_save_temps_EQ)) {
SaveTemps = llvm::StringSwitch<SaveTempsMode>(A->getValue())
.Case("cwd", SaveTempsCwd)
.Case("obj", SaveTempsObj)
.Default(SaveTempsCwd);
}
setLTOMode(Args);
// Ignore -fembed-bitcode options with LTO
// since the output will be bitcode anyway.
if (getLTOMode() == LTOK_None) {
if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
StringRef Name = A->getValue();
unsigned Model = llvm::StringSwitch<unsigned>(Name)
.Case("off", EmbedNone)
.Case("all", EmbedBitcode)
.Case("bitcode", EmbedBitcode)
.Case("marker", EmbedMarker)
.Default(~0U);
if (Model == ~0U) {
Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
<< Name;
} else
BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
}
} else {
// claim the bitcode option under LTO so no warning is issued.
Args.ClaimAllArgs(options::OPT_fembed_bitcode_EQ);
}
std::unique_ptr<llvm::opt::InputArgList> UArgs =
llvm::make_unique<InputArgList>(std::move(Args));
// Perform the default argument translations.
DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs);
// Owned by the host.
const ToolChain &TC = getToolChain(
*UArgs, computeTargetTriple(*this, DefaultTargetTriple, *UArgs));
// The compilation takes ownership of Args.
Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
if (!HandleImmediateArgs(*C))
return C;
// Construct the list of inputs.
InputList Inputs;
BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs);
// Populate the tool chains for the offloading devices, if any.
CreateOffloadingDeviceToolChains(*C, Inputs);
// Construct the list of abstract actions to perform for this compilation. On
// MachO targets this uses the driver-driver and universal actions.
if (TC.getTriple().isOSBinFormatMachO())
BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
else
BuildActions(*C, C->getArgs(), Inputs, C->getActions());
if (CCCPrintPhases) {
PrintActions(*C);
return C;
}
BuildJobs(*C);
return C;
}
static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) {
llvm::opt::ArgStringList ASL;
for (const auto *A : Args)
A->render(Args, ASL);
for (auto I = ASL.begin(), E = ASL.end(); I != E; ++I) {
if (I != ASL.begin())
OS << ' ';
Command::printArg(OS, *I, true);
}
OS << '\n';
}
2011-08-18 06:59:59 +08:00
// When clang crashes, produce diagnostic information including the fully
// preprocessed source file(s). Request that the developer attach the
// diagnostic information to a bug report.
void Driver::generateCompilationDiagnostics(Compilation &C,
const Command &FailingCommand) {
if (C.getArgs().hasArg(options::OPT_fno_crash_diagnostics))
return;
// Don't try to generate diagnostics for link or dsymutil jobs.
if (FailingCommand.getCreator().isLinkJob() ||
FailingCommand.getCreator().isDsymutilJob())
return;
// Print the version of the compiler.
PrintVersion(C, llvm::errs());
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "PLEASE submit a bug report to " BUG_REPORT_URL " and include the "
"crash backtrace, preprocessed source, and associated run script.";
// Suppress driver output and emit preprocessor output to temp file.
Mode = CPPMode;
CCGenDiagnostics = true;
// Save the original job command(s).
Command Cmd = FailingCommand;
// Keep track of whether we produce any errors while trying to produce
// preprocessed sources.
DiagnosticErrorTrap Trap(Diags);
// Suppress tool output.
C.initCompilationForDiagnostics();
// Construct the list of inputs.
InputList Inputs;
BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs);
for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) {
bool IgnoreInput = false;
// Ignore input from stdin or any inputs that cannot be preprocessed.
// Check type first as not all linker inputs have a value.
if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
IgnoreInput = true;
} else if (!strcmp(it->second->getValue(), "-")) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - "
"ignoring input from stdin.";
IgnoreInput = true;
}
if (IgnoreInput) {
it = Inputs.erase(it);
ie = Inputs.end();
} else {
++it;
}
}
if (Inputs.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - "
"no preprocessable inputs.";
return;
}
// Don't attempt to generate preprocessed files if multiple -arch options are
// used, unless they're all duplicates.
llvm::StringSet<> ArchNames;
for (const Arg *A : C.getArgs()) {
if (A->getOption().matches(options::OPT_arch)) {
StringRef ArchName = A->getValue();
ArchNames.insert(ArchName);
}
}
if (ArchNames.size() > 1) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s) - cannot generate "
"preprocessed source with multiple -arch options.";
return;
}
// Construct the list of abstract actions to perform for this compilation. On
// Darwin OSes this uses the driver-driver and builds universal actions.
const ToolChain &TC = C.getDefaultToolChain();
if (TC.getTriple().isOSBinFormatMachO())
BuildUniversalActions(C, TC, Inputs);
else
BuildActions(C, C.getArgs(), Inputs, C.getActions());
BuildJobs(C);
// If there were errors building the compilation, quit now.
if (Trap.hasErrorOccurred()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
// Generate preprocessed output.
SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
C.ExecuteJobs(C.getJobs(), FailingCommands);
// If any of the preprocessing commands failed, clean up and exit.
if (!FailingCommands.empty()) {
if (!isSaveTempsEnabled())
C.CleanupFileList(C.getTempFiles(), true);
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
const ArgStringList &TempFiles = C.getTempFiles();
if (TempFiles.empty()) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating preprocessed source(s).";
return;
}
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "\n********************\n\n"
"PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n"
"Preprocessed source(s) and associated run script(s) are located at:";
SmallString<128> VFS;
for (const char *TempFile : TempFiles) {
Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile;
if (StringRef(TempFile).endswith(".cache")) {
// In some cases (modules) we'll dump extra data to help with reproducing
// the crash into a directory next to the output.
VFS = llvm::sys::path::filename(TempFile);
llvm::sys::path::append(VFS, "vfs", "vfs.yaml");
}
}
// Assume associated files are based off of the first temporary file.
CrashReportInfo CrashInfo(TempFiles[0], VFS);
std::string Script = CrashInfo.Filename.rsplit('.').first.str() + ".sh";
std::error_code EC;
llvm::raw_fd_ostream ScriptOS(Script, EC, llvm::sys::fs::F_Excl);
if (EC) {
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Error generating run script: " + Script + " " + EC.message();
} else {
ScriptOS << "# Crash reproducer for " << getClangFullVersion() << "\n"
<< "# Driver args: ";
printArgList(ScriptOS, C.getInputArgs());
ScriptOS << "# Original command: ";
Cmd.Print(ScriptOS, "\n", /*Quote=*/true);
Cmd.Print(ScriptOS, "\n", /*Quote=*/true, &CrashInfo);
Diag(clang::diag::note_drv_command_failed_diag_msg) << Script;
}
for (const auto &A : C.getArgs().filtered(options::OPT_frewrite_map_file,
options::OPT_frewrite_map_file_EQ))
Diag(clang::diag::note_drv_command_failed_diag_msg) << A->getValue();
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "\n\n********************";
}
void Driver::setUpResponseFiles(Compilation &C, Command &Cmd) {
// Since commandLineFitsWithinSystemLimits() may underestimate system's capacity
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
// if the tool does not support response files, there is a chance/ that things
// will just work without a response file, so we silently just skip it.
if (Cmd.getCreator().getResponseFilesSupport() == Tool::RF_None ||
llvm::sys::commandLineFitsWithinSystemLimits(Cmd.getExecutable(), Cmd.getArguments()))
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
return;
std::string TmpName = GetTemporaryPath("response", "txt");
Cmd.setResponseFile(
C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())));
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
}
int Driver::ExecuteCompilation(
Compilation &C,
SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands) {
// Just print if -### was present.
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
C.getJobs().Print(llvm::errs(), "\n", true);
return 0;
}
// If there were errors building the compilation, quit now.
if (Diags.hasErrorOccurred())
return 1;
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
// Set up response file names for each command, if necessary
for (auto &Job : C.getJobs())
setUpResponseFiles(C, Job);
Teach Clang how to use response files when calling other tools Patch by Rafael Auler! This patch addresses PR15171 and teaches Clang how to call other tools with response files, when the command line exceeds system limits. This is a problem for Windows systems, whose maximum command-line length is 32kb. I introduce the concept of "response file support" for each Tool object. A given Tool may have full support for response files (e.g. MSVC's link.exe) or only support file names inside response files, but no flags (e.g. Apple's ld64, as commented in PR15171), or no support at all (the default case). Therefore, if you implement a toolchain in the clang driver and you want clang to be able to use response files in your tools, you must override a method (getReponseFileSupport()) to tell so. I designed it to support different kinds of tools and internationalisation needs: - VS response files ( UTF-16 ) - GNU tools ( uses system's current code page, windows' legacy intl. support, with escaped backslashes. On unix, fallback to UTF-8 ) - Clang itself ( UTF-16 on windows, UTF-8 on unix ) - ld64 response files ( only a limited file list, UTF-8 on unix ) With this design, I was able to test input file names with spaces and international characters for Windows. When the linker input is large enough, it creates a response file with the correct encoding. On a Mac, to test ld64, I temporarily changed Clang's behavior to always use response files regardless of the command size limit (avoiding using huge command line inputs). I tested clang with the LLVM test suite (compiling benchmarks) and it did fine. Test Plan: A LIT test that tests proper response files support. This is tricky, since, for Unix systems, we need a 2MB response file, otherwise Clang will simply use regular arguments instead of a response file. To do this, my LIT test generate the file on the fly by cloning many -DTEST parameters until we have a 2MB file. I found out that processing 2MB of arguments is pretty slow, it takes 1 minute using my notebook in a debug build, or 10s in a Release build. Therefore, I also added "REQUIRES: long_tests", so it will only run when the user wants to run long tests. In the full discussion in http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130408/171463.html, Rafael Espindola discusses a proper way to test llvm::sys::argumentsFitWithinSystemLimits(), and, there, Chandler suggests to use 10 times the current system limit (20MB resp file), so we guarantee that the system will always use response file, even if a new linux comes up that can handle a few more bytes of arguments. However, by testing with a 20MB resp file, the test takes long 8 minutes just to perform a silly check to see if the driver will use a response file. I found it to be unreasonable. Thus, I discarded this approach and uses a 2MB response file, which should be enough. Reviewers: asl, rafael, silvas Reviewed By: silvas Subscribers: silvas, rnk, thakis, cfe-commits Differential Revision: http://reviews.llvm.org/D4897 llvm-svn: 217792
2014-09-16 01:45:39 +08:00
C.ExecuteJobs(C.getJobs(), FailingCommands);
// Remove temp files.
C.CleanupFileList(C.getTempFiles());
// If the command succeeded, we are done.
if (FailingCommands.empty())
return 0;
// Otherwise, remove result files and print extra information about abnormal
// failures.
for (const auto &CmdPair : FailingCommands) {
int Res = CmdPair.first;
const Command *FailingCommand = CmdPair.second;
// Remove result files if we're not saving temps.
if (!isSaveTempsEnabled()) {
const JobAction *JA = cast<JobAction>(&FailingCommand->getSource());
C.CleanupFileMap(C.getResultFiles(), JA, true);
// Failure result files are valid unless we crashed.
if (Res < 0)
C.CleanupFileMap(C.getFailureResultFiles(), JA, true);
}
// Print extra information about abnormal failures, if possible.
//
// This is ad-hoc, but we don't want to be excessively noisy. If the result
// status was 1, assume the command failed normally. In particular, if it
// was the compiler then assume it gave a reasonable error code. Failures
// in other tools are less common, and they generally have worse
// diagnostics, so always print the diagnostic there.
const Tool &FailingTool = FailingCommand->getCreator();
if (!FailingCommand->getCreator().hasGoodDiagnostics() || Res != 1) {
// FIXME: See FIXME above regarding result code interpretation.
if (Res < 0)
Diag(clang::diag::err_drv_command_signalled)
<< FailingTool.getShortName();
else
Diag(clang::diag::err_drv_command_failed) << FailingTool.getShortName()
<< Res;
}
}
return 0;
}
void Driver::PrintHelp(bool ShowHidden) const {
unsigned IncludedFlagsBitmask;
unsigned ExcludedFlagsBitmask;
std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
getIncludeExcludeOptionFlagMasks();
ExcludedFlagsBitmask |= options::NoDriverOption;
if (!ShowHidden)
ExcludedFlagsBitmask |= HelpHidden;
getOpts().PrintHelp(llvm::outs(), Name.c_str(), DriverTitle.c_str(),
IncludedFlagsBitmask, ExcludedFlagsBitmask);
}
void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
// FIXME: The following handlers should use a callback mechanism, we don't
// know what the client would like to do.
OS << getClangFullVersion() << '\n';
const ToolChain &TC = C.getDefaultToolChain();
OS << "Target: " << TC.getTripleString() << '\n';
// Print the threading model.
if (Arg *A = C.getArgs().getLastArg(options::OPT_mthread_model)) {
// Don't print if the ToolChain would have barfed on it already
if (TC.isThreadModelSupported(A->getValue()))
OS << "Thread model: " << A->getValue();
} else
OS << "Thread model: " << TC.getThreadModel();
OS << '\n';
// Print out the install directory.
OS << "InstalledDir: " << InstalledDir << '\n';
}
/// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
/// option.
static void PrintDiagnosticCategories(raw_ostream &OS) {
// Skip the empty category.
for (unsigned i = 1, max = DiagnosticIDs::getNumberOfCategories(); i != max;
++i)
OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n';
}
bool Driver::HandleImmediateArgs(const Compilation &C) {
// The order these options are handled in gcc is all over the place, but we
// don't expect inconsistencies w.r.t. that to matter in practice.
if (C.getArgs().hasArg(options::OPT_dumpmachine)) {
llvm::outs() << C.getDefaultToolChain().getTripleString() << '\n';
return false;
}
if (C.getArgs().hasArg(options::OPT_dumpversion)) {
// Since -dumpversion is only implemented for pedantic GCC compatibility, we
// return an answer which matches our definition of __VERSION__.
//
// If we want to return a more correct answer some day, then we should
// introduce a non-pedantically GCC compatible mode to Clang in which we
// provide sensible definitions for -dumpversion, __VERSION__, etc.
llvm::outs() << "4.2.1\n";
return false;
}
if (C.getArgs().hasArg(options::OPT__print_diagnostic_categories)) {
PrintDiagnosticCategories(llvm::outs());
return false;
}
if (C.getArgs().hasArg(options::OPT_help) ||
C.getArgs().hasArg(options::OPT__help_hidden)) {
PrintHelp(C.getArgs().hasArg(options::OPT__help_hidden));
return false;
}
if (C.getArgs().hasArg(options::OPT__version)) {
// Follow gcc behavior and use stdout for --version and stderr for -v.
PrintVersion(C, llvm::outs());
return false;
}
if (C.getArgs().hasArg(options::OPT_v) ||
C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
PrintVersion(C, llvm::errs());
SuppressMissingInputWarning = true;
}
const ToolChain &TC = C.getDefaultToolChain();
if (C.getArgs().hasArg(options::OPT_v))
TC.printVerboseInfo(llvm::errs());
if (C.getArgs().hasArg(options::OPT_print_search_dirs)) {
llvm::outs() << "programs: =";
bool separator = false;
for (const std::string &Path : TC.getProgramPaths()) {
if (separator)
llvm::outs() << ':';
llvm::outs() << Path;
separator = true;
}
llvm::outs() << "\n";
llvm::outs() << "libraries: =" << ResourceDir;
2012-04-16 12:16:43 +08:00
StringRef sysroot = C.getSysRoot();
for (const std::string &Path : TC.getFilePaths()) {
// Always print a separator. ResourceDir was the first item shown.
llvm::outs() << ':';
// Interpretation of leading '=' is needed only for NetBSD.
if (Path[0] == '=')
llvm::outs() << sysroot << Path.substr(1);
else
llvm::outs() << Path;
}
llvm::outs() << "\n";
return false;
}
// FIXME: The following handlers should use a callback mechanism, we don't
// know what the client would like to do.
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_file_name_EQ)) {
llvm::outs() << GetFilePath(A->getValue(), TC) << "\n";
return false;
}
if (Arg *A = C.getArgs().getLastArg(options::OPT_print_prog_name_EQ)) {
llvm::outs() << GetProgramPath(A->getValue(), TC) << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
llvm::outs() << GetFilePath("libgcc.a", TC) << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_multi_lib)) {
for (const Multilib &Multilib : TC.getMultilibs())
llvm::outs() << Multilib << "\n";
return false;
}
if (C.getArgs().hasArg(options::OPT_print_multi_directory)) {
for (const Multilib &Multilib : TC.getMultilibs()) {
if (Multilib.gccSuffix().empty())
llvm::outs() << ".\n";
else {
StringRef Suffix(Multilib.gccSuffix());
assert(Suffix.front() == '/');
llvm::outs() << Suffix.substr(1) << "\n";
}
}
return false;
}
return true;
}
// Display an action graph human-readably. Action A is the "sink" node
// and latest-occuring action. Traversal is in pre-order, visiting the
// inputs to each action before printing the action itself.
static unsigned PrintActions1(const Compilation &C, Action *A,
std::map<Action *, unsigned> &Ids) {
if (Ids.count(A)) // A was already visited.
return Ids[A];
std::string str;
llvm::raw_string_ostream os(str);
os << Action::getClassName(A->getKind()) << ", ";
if (InputAction *IA = dyn_cast<InputAction>(A)) {
os << "\"" << IA->getInputArg().getValue() << "\"";
} else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
os << '"' << BIA->getArchName() << '"' << ", {"
<< PrintActions1(C, *BIA->input_begin(), Ids) << "}";
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
} else if (OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
bool IsFirst = true;
OA->doOnEachDependence(
[&](Action *A, const ToolChain *TC, const char *BoundArch) {
// E.g. for two CUDA device dependences whose bound arch is sm_20 and
// sm_35 this will generate:
// "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device"
// (nvptx64-nvidia-cuda:sm_35) {#ID}
if (!IsFirst)
os << ", ";
os << '"';
if (TC)
os << A->getOffloadingKindPrefix();
else
os << "host";
os << " (";
os << TC->getTriple().normalize();
if (BoundArch)
os << ":" << BoundArch;
os << ")";
os << '"';
os << " {" << PrintActions1(C, A, Ids) << "}";
IsFirst = false;
});
} else {
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
const ActionList *AL = &A->getInputs();
if (AL->size()) {
const char *Prefix = "{";
for (Action *PreRequisite : *AL) {
os << Prefix << PrintActions1(C, PreRequisite, Ids);
Prefix = ", ";
}
os << "}";
} else
os << "{}";
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Append offload info for all options other than the offloading action
// itself (e.g. (cuda-device, sm_20) or (cuda-host)).
std::string offload_str;
llvm::raw_string_ostream offload_os(offload_str);
if (!isa<OffloadAction>(A)) {
auto S = A->getOffloadingKindPrefix();
if (!S.empty()) {
offload_os << ", (" << S;
if (A->getOffloadingArch())
offload_os << ", " << A->getOffloadingArch();
offload_os << ")";
}
}
unsigned Id = Ids.size();
Ids[A] = Id;
llvm::errs() << Id << ": " << os.str() << ", "
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
<< types::getTypeName(A->getType()) << offload_os.str() << "\n";
return Id;
}
// Print the action graphs in a compilation C.
// For example "clang -c file1.c file2.c" is composed of two subgraphs.
void Driver::PrintActions(const Compilation &C) const {
std::map<Action *, unsigned> Ids;
for (Action *A : C.getActions())
PrintActions1(C, A, Ids);
}
/// \brief Check whether the given input tree contains any compilation or
/// assembly actions.
static bool ContainsCompileOrAssembleAction(const Action *A) {
if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A) ||
isa<AssembleJobAction>(A))
return true;
for (const Action *Input : A->inputs())
if (ContainsCompileOrAssembleAction(Input))
return true;
return false;
}
void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
const InputList &BAInputs) const {
DerivedArgList &Args = C.getArgs();
ActionList &Actions = C.getActions();
llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
// Collect the list of architectures. Duplicates are allowed, but should only
// be handled once (in the order seen).
llvm::StringSet<> ArchNames;
SmallVector<const char *, 4> Archs;
for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_arch)) {
// Validate the option here; we don't save the type here because its
// particular spelling may participate in other driver choices.
llvm::Triple::ArchType Arch =
tools::darwin::getArchTypeForMachOArchName(A->getValue());
if (Arch == llvm::Triple::UnknownArch) {
Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args);
continue;
}
A->claim();
if (ArchNames.insert(A->getValue()).second)
Archs.push_back(A->getValue());
}
}
// When there is no explicit arch for this platform, make sure we still bind
// the architecture (to the default) so that -Xarch_ is handled correctly.
if (!Archs.size())
Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
ActionList SingleActions;
BuildActions(C, Args, BAInputs, SingleActions);
// Add in arch bindings for every top level action, as well as lipo and
// dsymutil steps if needed.
for (Action* Act : SingleActions) {
// Make sure we can lipo this kind of output. If not (and it is an actual
// output) then we disallow, since we can't create an output file with the
// right name without overwriting it. We could remove this oddity by just
// changing the output names to include the arch, which would also fix
// -save-temps. Compatibility wins for now.
if (Archs.size() > 1 && !types::canLipoType(Act->getType()))
Diag(clang::diag::err_drv_invalid_output_with_multiple_archs)
<< types::getTypeName(Act->getType());
ActionList Inputs;
for (unsigned i = 0, e = Archs.size(); i != e; ++i)
Inputs.push_back(C.MakeAction<BindArchAction>(Act, Archs[i]));
// Lipo if necessary, we do it this way because we need to set the arch flag
// so that -Xarch_ gets overwritten.
if (Inputs.size() == 1 || Act->getType() == types::TY_Nothing)
Actions.append(Inputs.begin(), Inputs.end());
else
Actions.push_back(C.MakeAction<LipoJobAction>(Inputs, Act->getType()));
// Handle debug info queries.
Arg *A = Args.getLastArg(options::OPT_g_Group);
2012-04-16 05:22:10 +08:00
if (A && !A->getOption().matches(options::OPT_g0) &&
!A->getOption().matches(options::OPT_gstabs) &&
ContainsCompileOrAssembleAction(Actions.back())) {
2012-04-16 05:22:10 +08:00
// Add a 'dsymutil' step if necessary, when debug info is enabled and we
// have a compile input. We need to run 'dsymutil' ourselves in such cases
2013-01-29 01:39:03 +08:00
// because the debug info will refer to a temporary object file which
2012-04-16 05:22:10 +08:00
// will be removed at the end of the compilation process.
if (Act->getType() == types::TY_Image) {
ActionList Inputs;
Inputs.push_back(Actions.back());
Actions.pop_back();
Actions.push_back(
C.MakeAction<DsymutilJobAction>(Inputs, types::TY_dSYM));
}
2012-04-16 05:22:10 +08:00
// Verify the debug info output.
if (Args.hasArg(options::OPT_verify_debug_info)) {
Action* LastAction = Actions.back();
2012-04-16 05:22:10 +08:00
Actions.pop_back();
Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>(
LastAction, types::TY_Nothing));
2012-04-16 05:22:10 +08:00
}
}
}
}
/// \brief Check that the file referenced by Value exists. If it doesn't,
/// issue a diagnostic and return false.
static bool DiagnoseInputExistence(const Driver &D, const DerivedArgList &Args,
StringRef Value, types::ID Ty) {
if (!D.getCheckInputsExist())
return true;
// stdin always exists.
if (Value == "-")
return true;
SmallString<64> Path(Value);
if (Arg *WorkDir = Args.getLastArg(options::OPT_working_directory)) {
if (!llvm::sys::path::is_absolute(Path)) {
SmallString<64> Directory(WorkDir->getValue());
llvm::sys::path::append(Directory, Value);
Path.assign(Directory);
}
}
if (llvm::sys::fs::exists(Twine(Path)))
return true;
if (D.IsCLMode()) {
if (!llvm::sys::path::is_absolute(Twine(Path)) &&
llvm::sys::Process::FindInEnvPath("LIB", Value))
return true;
if (Args.hasArg(options::OPT__SLASH_link) && Ty == types::TY_Object) {
// Arguments to the /link flag might cause the linker to search for object
// and library files in paths we don't know about. Don't error in such
// cases.
return true;
}
}
D.Diag(clang::diag::err_drv_no_such_file) << Path;
return false;
}
// Construct a the list of inputs and their types.
void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
InputList &Inputs) const {
// Track the current user specified (-x) input. We also explicitly track the
// argument used to set the type; we only want to claim the type when we
// actually use it, so we warn about unused -x arguments.
types::ID InputType = types::TY_Nothing;
Arg *InputTypeArg = nullptr;
// The last /TC or /TP option sets the input type to C or C++ globally.
if (Arg *TCTP = Args.getLastArgNoClaim(options::OPT__SLASH_TC,
options::OPT__SLASH_TP)) {
InputTypeArg = TCTP;
InputType = TCTP->getOption().matches(options::OPT__SLASH_TC)
? types::TY_C
: types::TY_CXX;
arg_iterator it =
Args.filtered_begin(options::OPT__SLASH_TC, options::OPT__SLASH_TP);
const arg_iterator ie = Args.filtered_end();
Arg *Previous = *it++;
bool ShowNote = false;
while (it != ie) {
Diag(clang::diag::warn_drv_overriding_flag_option)
<< Previous->getSpelling() << (*it)->getSpelling();
Previous = *it++;
ShowNote = true;
}
if (ShowNote)
Diag(clang::diag::note_drv_t_option_is_global);
// No driver mode exposes -x and /TC or /TP; we don't support mixing them.
assert(!Args.hasArg(options::OPT_x) && "-x and /TC or /TP is not allowed");
}
for (Arg *A : Args) {
if (A->getOption().getKind() == Option::InputClass) {
const char *Value = A->getValue();
types::ID Ty = types::TY_INVALID;
// Infer the input type if necessary.
if (InputType == types::TY_Nothing) {
// If there was an explicit arg for this, claim it.
if (InputTypeArg)
InputTypeArg->claim();
// stdin must be handled specially.
if (memcmp(Value, "-", 2) == 0) {
// If running with -E, treat as a C input (this changes the builtin
// macros, for example). This may be overridden by -ObjC below.
//
// Otherwise emit an error but still use a valid type to avoid
// spurious errors (e.g., no inputs).
if (!Args.hasArgNoClaim(options::OPT_E) && !CCCIsCPP())
Diag(IsCLMode() ? clang::diag::err_drv_unknown_stdin_type_clang_cl
: clang::diag::err_drv_unknown_stdin_type);
Ty = types::TY_C;
} else {
// Otherwise lookup by extension.
// Fallback is C if invoked as C preprocessor or Object otherwise.
// We use a host hook here because Darwin at least has its own
// idea of what .s is.
if (const char *Ext = strrchr(Value, '.'))
Ty = TC.LookupTypeForExtension(Ext + 1);
if (Ty == types::TY_INVALID) {
if (CCCIsCPP())
Ty = types::TY_C;
else
Ty = types::TY_Object;
}
// If the driver is invoked as C++ compiler (like clang++ or c++) it
// should autodetect some input files as C++ for g++ compatibility.
if (CCCIsCXX()) {
types::ID OldTy = Ty;
Ty = types::lookupCXXTypeForCType(Ty);
if (Ty != OldTy)
Diag(clang::diag::warn_drv_treating_input_as_cxx)
<< getTypeName(OldTy) << getTypeName(Ty);
}
}
2009-05-19 05:47:54 +08:00
// -ObjC and -ObjC++ override the default language, but only for "source
// files". We just treat everything that isn't a linker input as a
// source file.
//
2009-05-19 05:47:54 +08:00
// FIXME: Clean this up if we move the phase sequence into the type.
if (Ty != types::TY_Object) {
if (Args.hasArg(options::OPT_ObjC))
Ty = types::TY_ObjC;
else if (Args.hasArg(options::OPT_ObjCXX))
Ty = types::TY_ObjCXX;
}
} else {
assert(InputTypeArg && "InputType set w/o InputTypeArg");
if (!InputTypeArg->getOption().matches(options::OPT_x)) {
// If emulating cl.exe, make sure that /TC and /TP don't affect input
// object files.
const char *Ext = strrchr(Value, '.');
if (Ext && TC.LookupTypeForExtension(Ext + 1) == types::TY_Object)
Ty = types::TY_Object;
}
if (Ty == types::TY_INVALID) {
Ty = InputType;
InputTypeArg->claim();
}
}
if (DiagnoseInputExistence(*this, Args, Value, Ty))
Inputs.push_back(std::make_pair(Ty, A));
} else if (A->getOption().matches(options::OPT__SLASH_Tc)) {
StringRef Value = A->getValue();
if (DiagnoseInputExistence(*this, Args, Value, types::TY_C)) {
Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
Inputs.push_back(std::make_pair(types::TY_C, InputArg));
}
A->claim();
} else if (A->getOption().matches(options::OPT__SLASH_Tp)) {
StringRef Value = A->getValue();
if (DiagnoseInputExistence(*this, Args, Value, types::TY_CXX)) {
Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
Inputs.push_back(std::make_pair(types::TY_CXX, InputArg));
}
A->claim();
} else if (A->getOption().hasFlag(options::LinkerInput)) {
// Just treat as object type, we could make a special type for this if
// necessary.
Inputs.push_back(std::make_pair(types::TY_Object, A));
} else if (A->getOption().matches(options::OPT_x)) {
InputTypeArg = A;
InputType = types::lookupTypeForTypeSpecifier(A->getValue());
A->claim();
// Follow gcc behavior and treat as linker input for invalid -x
// options. Its not clear why we shouldn't just revert to unknown; but
2010-12-18 05:22:33 +08:00
// this isn't very important, we might as well be bug compatible.
if (!InputType) {
Diag(clang::diag::err_drv_unknown_language) << A->getValue();
InputType = types::TY_Object;
}
}
}
if (CCCIsCPP() && Inputs.empty()) {
// If called as standalone preprocessor, stdin is processed
// if no other input is present.
Arg *A = MakeInputArg(Args, Opts, "-");
Inputs.push_back(std::make_pair(types::TY_C, A));
}
}
namespace {
/// Provides a convenient interface for different programming models to generate
/// the required device actions.
class OffloadingActionBuilder final {
/// Flag used to trace errors in the builder.
bool IsValid = false;
/// The compilation that is using this builder.
Compilation &C;
/// The derived arguments associated with this builder.
DerivedArgList &Args;
/// Map between an input argument and the offload kinds used to process it.
std::map<const Arg *, unsigned> InputArgToOffloadKindMap;
/// Builder interface. It doesn't build anything or keep any state.
class DeviceActionBuilder {
public:
typedef llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PhasesTy;
enum ActionBuilderReturnCode {
// The builder acted successfully on the current action.
ABRT_Success,
// The builder didn't have to act on the current action.
ABRT_Inactive,
// The builder was successful and requested the host action to not be
// generated.
ABRT_Ignore_Host,
};
protected:
/// Compilation associated with this builder.
Compilation &C;
/// Tool chains associated with this builder. The same programming
/// model may have associated one or more tool chains.
SmallVector<const ToolChain *, 2> ToolChains;
/// The derived arguments associated with this builder.
DerivedArgList &Args;
/// The inputs associated with this builder.
const Driver::InputList &Inputs;
/// The associated offload kind.
Action::OffloadKind AssociatedOffloadKind = Action::OFK_None;
public:
DeviceActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs,
Action::OffloadKind AssociatedOffloadKind)
: C(C), Args(Args), Inputs(Inputs),
AssociatedOffloadKind(AssociatedOffloadKind) {}
virtual ~DeviceActionBuilder() {}
/// Fill up the array \a DA with all the device dependences that should be
/// added to the provided host action \a HostAction. By default it is
/// inactive.
virtual ActionBuilderReturnCode
getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase,
phases::ID FinalPhase, PhasesTy &Phases) {
return ABRT_Inactive;
}
/// Update the state to include the provided host action \a HostAction as a
/// dependency of the current device action. By default it is inactive.
virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) {
return ABRT_Inactive;
}
/// Append top level actions generated by the builder. Return true if errors
/// were found.
virtual void appendTopLevelActions(ActionList &AL) {}
/// Append linker actions generated by the builder. Return true if errors
/// were found.
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
/// Initialize the builder. Return true if any initialization errors are
/// found.
virtual bool initialize() { return false; }
/// Return true if this builder is valid. We have a valid builder if we have
/// associated device tool chains.
bool isValid() { return !ToolChains.empty(); }
/// Return the associated offload kind.
Action::OffloadKind getAssociatedOffloadKind() {
return AssociatedOffloadKind;
}
};
/// \brief CUDA action builder. It injects device code in the host backend
/// action.
class CudaActionBuilder final : public DeviceActionBuilder {
/// Flags to signal if the user requested host-only or device-only
/// compilation.
bool CompileHostOnly = false;
bool CompileDeviceOnly = false;
/// List of GPU architectures to use in this compilation.
SmallVector<CudaArch, 4> GpuArchList;
/// The CUDA actions for the current input.
ActionList CudaDeviceActions;
/// The CUDA fat binary if it was generated for the current input.
Action *CudaFatBinary = nullptr;
/// Flag that is set to true if this builder acted on the current input.
bool IsActive = false;
public:
CudaActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {}
ActionBuilderReturnCode
getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase,
phases::ID FinalPhase, PhasesTy &Phases) override {
if (!IsActive)
return ABRT_Inactive;
// If we don't have more CUDA actions, we don't have any dependences to
// create for the host.
if (CudaDeviceActions.empty())
return ABRT_Success;
assert(CudaDeviceActions.size() == GpuArchList.size() &&
"Expecting one action per GPU architecture.");
assert(!CompileHostOnly &&
"Not expecting CUDA actions in host-only compilation.");
// If we are generating code for the device or we are in a backend phase,
// we attempt to generate the fat binary. We compile each arch to ptx and
// assemble to cubin, then feed the cubin *and* the ptx into a device
// "link" action, which uses fatbinary to combine these cubins into one
// fatbin. The fatbin is then an input to the host action if not in
// device-only mode.
if (CompileDeviceOnly || CurPhase == phases::Backend) {
ActionList DeviceActions;
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
// Produce the device action from the current phase up to the assemble
// phase.
for (auto Ph : Phases) {
// Skip the phases that were already dealt with.
if (Ph < CurPhase)
continue;
// We have to be consistent with the host final phase.
if (Ph > FinalPhase)
break;
CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
C, Args, Ph, CudaDeviceActions[I]);
if (Ph == phases::Assemble)
break;
}
// If we didn't reach the assemble phase, we can't generate the fat
// binary. We don't need to generate the fat binary if we are not in
// device-only mode.
if (!isa<AssembleJobAction>(CudaDeviceActions[I]) ||
CompileDeviceOnly)
continue;
Action *AssembleAction = CudaDeviceActions[I];
assert(AssembleAction->getType() == types::TY_Object);
assert(AssembleAction->getInputs().size() == 1);
Action *BackendAction = AssembleAction->getInputs()[0];
assert(BackendAction->getType() == types::TY_PP_Asm);
for (auto &A : {AssembleAction, BackendAction}) {
OffloadAction::DeviceDependences DDep;
DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
Action::OFK_Cuda);
DeviceActions.push_back(
C.MakeAction<OffloadAction>(DDep, A->getType()));
}
}
// We generate the fat binary if we have device input actions.
if (!DeviceActions.empty()) {
CudaFatBinary =
C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
if (!CompileDeviceOnly) {
DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
Action::OFK_Cuda);
// Clear the fat binary, it is already a dependence to an host
// action.
CudaFatBinary = nullptr;
}
// Remove the CUDA actions as they are already connected to an host
// action or fat binary.
CudaDeviceActions.clear();
}
// We avoid creating host action in device-only mode.
return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
}
assert(CurPhase < phases::Backend && "Generating single CUDA "
"instructions should only occur "
"before the backend phase!");
// By default, we produce an action for each device arch.
for (Action *&A : CudaDeviceActions)
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
return ABRT_Success;
}
ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
// While generating code for CUDA, we only depend on the host input action
// to trigger the creation of all the CUDA device actions.
// If we are dealing with an input action, replicate it for each GPU
// architecture. If we are in host-only mode we return 'success' so that
// the host uses the CUDA offload kind.
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
assert(!GpuArchList.empty() &&
"We should have at least one GPU architecture.");
// If the host input is not CUDA, we don't need to bother about this
// input.
if (IA->getType() != types::TY_CUDA) {
// The builder will ignore this input.
IsActive = false;
return ABRT_Inactive;
}
// Set the flag to true, so that the builder acts on the current input.
IsActive = true;
if (CompileHostOnly)
return ABRT_Success;
// Replicate inputs for each GPU architecture.
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
CudaDeviceActions.push_back(C.MakeAction<InputAction>(
IA->getInputArg(), types::TY_CUDA_DEVICE));
return ABRT_Success;
}
return IsActive ? ABRT_Success : ABRT_Inactive;
}
void appendTopLevelActions(ActionList &AL) override {
// Utility to append actions to the top level list.
auto AddTopLevel = [&](Action *A, CudaArch BoundArch) {
OffloadAction::DeviceDependences Dep;
Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch),
Action::OFK_Cuda);
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
};
// If we have a fat binary, add it to the list.
if (CudaFatBinary) {
AddTopLevel(CudaFatBinary, CudaArch::UNKNOWN);
CudaDeviceActions.clear();
CudaFatBinary = nullptr;
return;
}
if (CudaDeviceActions.empty())
return;
// If we have CUDA actions at this point, that's because we have a have
// partial compilation, so we should have an action for each GPU
// architecture.
assert(CudaDeviceActions.size() == GpuArchList.size() &&
"Expecting one action per GPU architecture.");
assert(ToolChains.size() == 1 &&
"Expecting to have a sing CUDA toolchain.");
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
AddTopLevel(CudaDeviceActions[I], GpuArchList[I]);
CudaDeviceActions.clear();
}
bool initialize() override {
// We don't need to support CUDA.
if (!C.hasOffloadToolChain<Action::OFK_Cuda>())
return false;
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
assert(HostTC && "No toolchain for host compilation.");
if (HostTC->getTriple().isNVPTX()) {
// We do not support targeting NVPTX for host compilation. Throw
// an error and abort pipeline construction early so we don't trip
// asserts that assume device-side compilation.
C.getDriver().Diag(diag::err_drv_cuda_nvptx_host);
return true;
}
ToolChains.push_back(C.getSingleOffloadToolChain<Action::OFK_Cuda>());
Arg *PartialCompilationArg = Args.getLastArg(
options::OPT_cuda_host_only, options::OPT_cuda_device_only,
options::OPT_cuda_compile_host_device);
CompileHostOnly = PartialCompilationArg &&
PartialCompilationArg->getOption().matches(
options::OPT_cuda_host_only);
CompileDeviceOnly = PartialCompilationArg &&
PartialCompilationArg->getOption().matches(
options::OPT_cuda_device_only);
// Collect all cuda_gpu_arch parameters, removing duplicates.
llvm::SmallSet<CudaArch, 4> GpuArchs;
bool Error = false;
for (Arg *A : Args) {
if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
continue;
A->claim();
const auto &ArchStr = A->getValue();
CudaArch Arch = StringToCudaArch(ArchStr);
if (Arch == CudaArch::UNKNOWN) {
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
Error = true;
} else if (GpuArchs.insert(Arch).second)
GpuArchList.push_back(Arch);
}
// Default to sm_20 which is the lowest common denominator for supported
// GPUs.
// sm_20 code should work correctly, if suboptimally, on all newer GPUs.
if (GpuArchList.empty())
GpuArchList.push_back(CudaArch::SM_20);
return Error;
}
};
/// Add the implementation for other specialized builders here.
/// Specialized builders being used by this offloading action builder.
SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders;
public:
OffloadingActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: C(C), Args(Args) {
// Create a specialized builder for each device toolchain.
IsValid = true;
// Create a specialized builder for CUDA.
SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));
//
// TODO: Build other specialized builders here.
//
// Initialize all the builders, keeping track of errors.
for (auto *SB : SpecializedBuilders)
IsValid = IsValid && !SB->initialize();
}
~OffloadingActionBuilder() {
for (auto *SB : SpecializedBuilders)
delete SB;
}
/// Generate an action that adds device dependences (if any) to a host action.
/// If no device dependence actions exist, just return the host action \a
/// HostAction. If an error is found or if no builder requires the host action
/// to be generated, return nullptr.
Action *
addDeviceDependencesToHostAction(Action *HostAction, const Arg *InputArg,
phases::ID CurPhase, phases::ID FinalPhase,
DeviceActionBuilder::PhasesTy &Phases) {
if (!IsValid)
return nullptr;
if (SpecializedBuilders.empty())
return HostAction;
assert(HostAction && "Invalid host action!");
OffloadAction::DeviceDependences DDeps;
// Check if all the programming models agree we should not emit the host
// action. Also, keep track of the offloading kinds employed.
auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
unsigned InactiveBuilders = 0u;
unsigned IgnoringBuilders = 0u;
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid()) {
++InactiveBuilders;
continue;
}
auto RetCode = SB->getDeviceDepences(DDeps, CurPhase, FinalPhase, Phases);
// If the builder explicitly says the host action should be ignored,
// we need to increment the variable that tracks the builders that request
// the host object to be ignored.
if (RetCode == DeviceActionBuilder::ABRT_Ignore_Host)
++IgnoringBuilders;
// Unless the builder was inactive for this action, we have to record the
// offload kind because the host will have to use it.
if (RetCode != DeviceActionBuilder::ABRT_Inactive)
OffloadKind |= SB->getAssociatedOffloadKind();
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
}
// If all builders agree that the host object should be ignored, just return
// nullptr.
if (IgnoringBuilders &&
SpecializedBuilders.size() == (InactiveBuilders + IgnoringBuilders))
return nullptr;
if (DDeps.getActions().empty())
return HostAction;
// We have dependences we need to bundle together. We use an offload action
// for that.
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, DDeps);
return C.MakeAction<OffloadAction>(HDep, DDeps);
}
/// Generate an action that adds a host dependence to a device action. The
/// results will be kept in this action builder. Return true if an error was
/// found.
bool addHostDependenceToDeviceActions(Action *HostAction,
const Arg *InputArg) {
if (!IsValid)
return true;
assert(HostAction && "Invalid host action!");
// Register the offload kinds that are used.
auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
auto RetCode = SB->addDeviceDepences(HostAction);
// Host dependences for device actions are not compatible with that same
// action being ignored.
assert(RetCode != DeviceActionBuilder::ABRT_Ignore_Host &&
"Host dependence not expected to be ignored.!");
// Unless the builder was inactive for this action, we have to record the
// offload kind because the host will have to use it.
if (RetCode != DeviceActionBuilder::ABRT_Inactive)
OffloadKind |= SB->getAssociatedOffloadKind();
}
return false;
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
/// Add the offloading top level actions to the provided action list.
bool appendTopLevelActions(ActionList &AL, Action *HostAction,
const Arg *InputArg) {
auto NumActions = AL.size();
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
SB->appendTopLevelActions(AL);
}
assert(NumActions <= AL.size() && "Expecting more actions, not less!");
// Propagate to the current host action (if any) the offload information
// associated with the current input.
if (HostAction)
HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg],
/*BoundArch=*/nullptr);
// If any action is added by the builders, -o is ambiguous if we have more
// than one top-level action.
if (NumActions < AL.size() && Args.hasArg(options::OPT_o) &&
AL.size() > 1) {
C.getDriver().Diag(
clang::diag::err_drv_output_argument_with_multiple_files);
return true;
}
return false;
}
/// Processes the host linker action. This currently consists of replacing it
/// with an offload action if there are device link objects and propagate to
/// the host action all the offload kinds used in the current compilation. The
/// resulting action is returned.
Action *processHostLinkAction(Action *HostAction) {
// Add all the dependences from the device linking actions.
OffloadAction::DeviceDependences DDeps;
for (auto *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
SB->appendLinkDependences(DDeps);
}
// Calculate all the offload kinds used in the current compilation.
unsigned ActiveOffloadKinds = 0u;
for (auto &I : InputArgToOffloadKindMap)
ActiveOffloadKinds |= I.second;
// If we don't have device dependencies, we don't have to create an offload
// action.
if (DDeps.getActions().empty()) {
// Propagate all the active kinds to host action. Given that it is a link
// action it is assumed to depend on all actions generated so far.
HostAction->propagateHostOffloadInfo(ActiveOffloadKinds,
/*BoundArch=*/nullptr);
return HostAction;
}
// Create the offload action with all dependences. When an offload action
// is created the kinds are propagated to the host action, so we don't have
// to do that explicitely here.
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch*/ nullptr, ActiveOffloadKinds);
return C.MakeAction<OffloadAction>(HDep, DDeps);
}
};
} // anonymous namespace.
void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
const InputList &Inputs, ActionList &Actions) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
if (!SuppressMissingInputWarning && Inputs.empty()) {
Diag(clang::diag::err_drv_no_input_files);
return;
}
Arg *FinalPhaseArg;
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
if (FinalPhase == phases::Link && Args.hasArg(options::OPT_emit_llvm)) {
Diag(clang::diag::err_drv_emit_llvm_link);
}
// Reject -Z* at the top level, these options should never have been exposed
// by gcc.
if (Arg *A = Args.getLastArg(options::OPT_Z_Joined))
Diag(clang::diag::err_drv_use_of_Z_option) << A->getAsString(Args);
// Diagnose misuse of /Fo.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fo)) {
StringRef V = A->getValue();
if (Inputs.size() > 1 && !V.empty() &&
!llvm::sys::path::is_separator(V.back())) {
// Check whether /Fo tries to name an output file for multiple inputs.
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
<< A->getSpelling() << V;
Args.eraseArg(options::OPT__SLASH_Fo);
}
}
// Diagnose misuse of /Fa.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fa)) {
StringRef V = A->getValue();
if (Inputs.size() > 1 && !V.empty() &&
!llvm::sys::path::is_separator(V.back())) {
// Check whether /Fa tries to name an asm file for multiple inputs.
Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
<< A->getSpelling() << V;
Args.eraseArg(options::OPT__SLASH_Fa);
}
}
// Diagnose misuse of /o.
if (Arg *A = Args.getLastArg(options::OPT__SLASH_o)) {
if (A->getValue()[0] == '\0') {
// It has to have a value.
Diag(clang::diag::err_drv_missing_argument) << A->getSpelling() << 1;
Args.eraseArg(options::OPT__SLASH_o);
}
}
clang-cl: Implement initial limited support for precompiled headers. In the gcc precompiled header model, one explicitly runs clang with `-x c++-header` on a .h file to produce a gch file, and then includes the header with `-include foo.h` and if a .gch file exists for that header it gets used. This is documented at http://clang.llvm.org/docs/UsersManual.html#precompiled-headers cl.exe's model is fairly different, and controlled by the two flags /Yc and /Yu. A pch file is generated as a side effect of a regular compilation when /Ycheader.h is passed. While the compilation is running, the compiler keeps track of #include lines in the main translation unit and writes everything up to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells the compiler to skip all code in the main TU up to and including `#include "header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu without an argument, in that case a `#pragma hrdstop` takes the role of controlling the point where pch ends and real code begins.) This patch implements limited support for this in that it requires the pch header to be passed as a /FI force include flag – with this restriction, it can be implemented almost completely in the driver with fairly small amounts of code. For /Yu, this is trivial, and for /Yc a separate pch action is added that runs before the actual compilation. After r261774, the first failing command makes a compilation stop – this means if the pch fails to build the main compilation won't run, which is what we want. However, in /fallback builds we need to run the main compilation even if the pch build fails so that the main compilation's fallback can run. To achieve this, add a ForceSuccessCommand that pretends that the pch build always succeeded in /fallback builds (the main compilation will then fail to open the pch and run the fallback cl.exe invocation). If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl will now emit a "not implemented yet; flag ignored" warning that can be disabled using -Wno-clang-cl-pch. Since clang-cl doesn't yet serialize some important things (most notably `pragma comment(lib, ...)`, this feature is disabled by default and only enabled by an internal driver flag. Once it's more stable, this internal flag will disappear. (The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it does. Implementing support for this should be possible with the approach in this patch with minimal frontend changes by passing a --stop-at / --start-at flag from the driver to the frontend. This is left for a follow-up. I don't think we ever want to support `#pragma hdrstop`, and supporting it with this approach isn't easy: This approach relies on the driver knowing the pch filename in advance, and `#pragma hdrstop(out.pch)` can set the output filename, so the driver can't know about it in advance.) clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe would put them, but the pch file format is of course incompatible. This has ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in /fallback builds. http://reviews.llvm.org/D17695 llvm-svn: 262420
2016-03-02 07:16:44 +08:00
// Diagnose unsupported forms of /Yc /Yu. Ignore /Yc/Yu for now if:
// * no filename after it
// * both /Yc and /Yu passed but with different filenames
// * corresponding file not also passed as /FI
Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
if (YcArg && YcArg->getValue()[0] == '\0') {
Diag(clang::diag::warn_drv_ycyu_no_arg_clang_cl) << YcArg->getSpelling();
Args.eraseArg(options::OPT__SLASH_Yc);
YcArg = nullptr;
}
if (YuArg && YuArg->getValue()[0] == '\0') {
Diag(clang::diag::warn_drv_ycyu_no_arg_clang_cl) << YuArg->getSpelling();
Args.eraseArg(options::OPT__SLASH_Yu);
YuArg = nullptr;
}
if (YcArg && YuArg && strcmp(YcArg->getValue(), YuArg->getValue()) != 0) {
Diag(clang::diag::warn_drv_ycyu_different_arg_clang_cl);
Args.eraseArg(options::OPT__SLASH_Yc);
Args.eraseArg(options::OPT__SLASH_Yu);
YcArg = YuArg = nullptr;
}
if (YcArg || YuArg) {
StringRef Val = YcArg ? YcArg->getValue() : YuArg->getValue();
bool FoundMatchingInclude = false;
for (const Arg *Inc : Args.filtered(options::OPT_include)) {
// FIXME: Do case-insensitive matching and consider / and \ as equal.
if (Inc->getValue() == Val)
FoundMatchingInclude = true;
}
if (!FoundMatchingInclude) {
Diag(clang::diag::warn_drv_ycyu_no_fi_arg_clang_cl)
<< (YcArg ? YcArg : YuArg)->getSpelling();
Args.eraseArg(options::OPT__SLASH_Yc);
Args.eraseArg(options::OPT__SLASH_Yu);
YcArg = YuArg = nullptr;
}
}
if (YcArg && Inputs.size() > 1) {
Diag(clang::diag::warn_drv_yc_multiple_inputs_clang_cl);
Args.eraseArg(options::OPT__SLASH_Yc);
YcArg = nullptr;
}
if (Args.hasArg(options::OPT__SLASH_Y_)) {
// /Y- disables all pch handling. Rather than check for it everywhere,
// just remove clang-cl pch-related flags here.
Args.eraseArg(options::OPT__SLASH_Fp);
Args.eraseArg(options::OPT__SLASH_Yc);
Args.eraseArg(options::OPT__SLASH_Yu);
YcArg = YuArg = nullptr;
}
// Builder to be used to build offloading actions.
OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Construct the actions to perform.
ActionList LinkerInputs;
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PL;
for (auto &I : Inputs) {
types::ID InputType = I.first;
const Arg *InputArg = I.second;
PL.clear();
types::getCompilationPhases(InputType, PL);
// If the first step comes after the final phase we are doing as part of
// this compilation, warn the user about it.
phases::ID InitialPhase = PL[0];
if (InitialPhase > FinalPhase) {
// Claim here to avoid the more general unused warning.
InputArg->claim();
// Suppress all unused style warnings with -Qunused-arguments
if (Args.hasArg(options::OPT_Qunused_arguments))
continue;
// Special case when final phase determined by binary name, rather than
// by a command-line argument with a corresponding Arg.
if (CCCIsCPP())
Diag(clang::diag::warn_drv_input_file_unused_by_cpp)
<< InputArg->getAsString(Args) << getPhaseName(InitialPhase);
// Special case '-E' warning on a previously preprocessed file to make
// more sense.
else if (InitialPhase == phases::Compile &&
FinalPhase == phases::Preprocess &&
getPreprocessedType(InputType) == types::TY_INVALID)
Diag(clang::diag::warn_drv_preprocessed_input_file_unused)
<< InputArg->getAsString(Args) << !!FinalPhaseArg
<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
else
Diag(clang::diag::warn_drv_input_file_unused)
<< InputArg->getAsString(Args) << getPhaseName(InitialPhase)
<< !!FinalPhaseArg
<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
continue;
}
if (YcArg) {
// Add a separate precompile phase for the compile phase.
if (FinalPhase >= phases::Compile) {
const types::ID HeaderType = lookupHeaderTypeForSourceType(InputType);
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PCHPL;
types::getCompilationPhases(HeaderType, PCHPL);
Arg *PchInputArg = MakeInputArg(Args, Opts, YcArg->getValue());
// Build the pipeline for the pch file.
Action *ClangClPch =
C.MakeAction<InputAction>(*PchInputArg, HeaderType);
for (phases::ID Phase : PCHPL)
ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch);
assert(ClangClPch);
Actions.push_back(ClangClPch);
// The driver currently exits after the first failed command. This
// relies on that behavior, to make sure if the pch generation fails,
// the main compilation won't run.
}
}
// Build the pipeline for this file.
Action *Current = C.MakeAction<InputAction>(*InputArg, InputType);
// Use the current host action in any of the offloading actions, if
// required.
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
break;
for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
i != e; ++i) {
phases::ID Phase = *i;
// We are done if this step is past what the user requested.
if (Phase > FinalPhase)
break;
// Add any offload action the host action depends on.
Current = OffloadBuilder.addDeviceDependencesToHostAction(
Current, InputArg, Phase, FinalPhase, PL);
if (!Current)
break;
// Queue linker inputs.
if (Phase == phases::Link) {
assert((i + 1) == e && "linking must be final compilation step.");
LinkerInputs.push_back(Current);
Current = nullptr;
break;
}
// Otherwise construct the appropriate action.
auto *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current);
// We didn't create a new action, so we will just move to the next phase.
if (NewCurrent == Current)
continue;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
Current = NewCurrent;
// Use the current host action in any of the offloading actions, if
// required.
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
break;
if (Current->getType() == types::TY_Nothing)
break;
}
// If we ended with something, add to the output list.
if (Current)
Actions.push_back(Current);
// Add any top level actions generated for offloading.
OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);
}
// Add a link action if necessary.
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
if (!LinkerInputs.empty()) {
Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
LA = OffloadBuilder.processHostLinkAction(LA);
Actions.push_back(LA);
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
}
// If we are linking, claim any options which are obviously only used for
// compilation.
if (FinalPhase == phases::Link && PL.size() == 1) {
Args.ClaimAllArgs(options::OPT_CompileOnly_Group);
Args.ClaimAllArgs(options::OPT_cl_compile_Group);
}
// Claim ignored clang-cl options.
Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
// Claim --cuda-host-only and --cuda-compile-host-device, which may be passed
// to non-CUDA compilations and should not trigger warnings there.
Args.ClaimAllArgs(options::OPT_cuda_host_only);
Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
}
Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
phases::ID Phase, Action *Input) const {
llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
// Some types skip the assembler phase (e.g., llvm-bc), but we can't
// encode this in the steps because the intermediate type depends on
// arguments. Just special case here.
if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm)
return Input;
// Build the appropriate action.
switch (Phase) {
case phases::Link:
llvm_unreachable("link action invalid here.");
case phases::Preprocess: {
types::ID OutputTy;
// -{M, MM} alter the output type.
if (Args.hasArg(options::OPT_M, options::OPT_MM)) {
OutputTy = types::TY_Dependencies;
} else {
OutputTy = Input->getType();
if (!Args.hasFlag(options::OPT_frewrite_includes,
options::OPT_fno_rewrite_includes, false) &&
!CCGenDiagnostics)
OutputTy = types::getPreprocessedType(OutputTy);
assert(OutputTy != types::TY_INVALID &&
"Cannot preprocess this input type!");
}
return C.MakeAction<PreprocessJobAction>(Input, OutputTy);
}
case phases::Precompile: {
Unrevert r280035 now that the clang-cl bug it exposed has been fixed by r280133. Original commit message: C++ Modules TS: driver support for building modules. This works as follows: we add --precompile to the existing gamut of options for specifying how far to go when compiling an input (-E, -c, -S, etc.). This flag specifies that an input is taken to the precompilation step and no further, and this can be specified when building a .pcm from a module interface or when building a .pch from a header file. The .cppm extension (and some related extensions) are implicitly recognized as C++ module interface files. If --precompile is /not/ specified, the file is compiled (via a .pcm) to a .o file containing the code for the module (and then potentially also assembled and linked, if -S, -c, etc. are not specified). We do not yet suppress the emission of object code for other users of the module interface, so for now this will only work if everything in the .cppm file has vague linkage. As with the existing support for module-map modules, prebuilt modules can be provided as compiler inputs either via the -fmodule-file= command-line argument or via files named ModuleName.pcm in one of the directories specified via -fprebuilt-module-path=. This also exposes the -fmodules-ts cc1 flag in the driver. This is still experimental, and in particular, the concrete syntax is subject to change as the Modules TS evolves in the C++ committee. Unlike -fmodules, this flag does not enable support for implicitly loading module maps nor building modules via the module cache, but those features can be turned on separately and used in conjunction with the Modules TS support. llvm-svn: 280134
2016-08-31 03:06:26 +08:00
types::ID OutputTy = getPrecompiledType(Input->getType());
assert(OutputTy != types::TY_INVALID &&
"Cannot precompile this input type!");
if (Args.hasArg(options::OPT_fsyntax_only)) {
// Syntax checks should not emit a PCH file
OutputTy = types::TY_Nothing;
}
return C.MakeAction<PrecompileJobAction>(Input, OutputTy);
}
case phases::Compile: {
if (Args.hasArg(options::OPT_fsyntax_only))
return C.MakeAction<CompileJobAction>(Input, types::TY_Nothing);
if (Args.hasArg(options::OPT_rewrite_objc))
return C.MakeAction<CompileJobAction>(Input, types::TY_RewrittenObjC);
if (Args.hasArg(options::OPT_rewrite_legacy_objc))
return C.MakeAction<CompileJobAction>(Input,
types::TY_RewrittenLegacyObjC);
if (Args.hasArg(options::OPT__analyze, options::OPT__analyze_auto))
return C.MakeAction<AnalyzeJobAction>(Input, types::TY_Plist);
if (Args.hasArg(options::OPT__migrate))
return C.MakeAction<MigrateJobAction>(Input, types::TY_Remap);
if (Args.hasArg(options::OPT_emit_ast))
return C.MakeAction<CompileJobAction>(Input, types::TY_AST);
if (Args.hasArg(options::OPT_module_file_info))
return C.MakeAction<CompileJobAction>(Input, types::TY_ModuleFile);
if (Args.hasArg(options::OPT_verify_pch))
return C.MakeAction<VerifyPCHJobAction>(Input, types::TY_Nothing);
return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
}
case phases::Backend: {
if (isUsingLTO()) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
}
if (Args.hasArg(options::OPT_emit_llvm)) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
}
return C.MakeAction<BackendJobAction>(Input, types::TY_PP_Asm);
}
case phases::Assemble:
return C.MakeAction<AssembleJobAction>(std::move(Input), types::TY_Object);
}
llvm_unreachable("invalid phase in ConstructPhaseAction");
}
void Driver::BuildJobs(Compilation &C) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
// It is an error to provide a -o option if we are making multiple output
// files.
if (FinalOutput) {
unsigned NumOutputs = 0;
for (const Action *A : C.getActions())
if (A->getType() != types::TY_Nothing)
++NumOutputs;
if (NumOutputs > 1) {
Diag(clang::diag::err_drv_output_argument_with_multiple_files);
FinalOutput = nullptr;
}
}
// Collect the list of architectures.
llvm::StringSet<> ArchNames;
if (C.getDefaultToolChain().getTriple().isOSBinFormatMachO())
for (const Arg *A : C.getArgs())
if (A->getOption().matches(options::OPT_arch))
ArchNames.insert(A->getValue());
// Set of (Action, canonical ToolChain triple) pairs we've built jobs for.
std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults;
for (Action *A : C.getActions()) {
// If we are linking an image for multiple archs then the linker wants
// -arch_multiple and -final_output <final image name>. Unfortunately, this
// doesn't fit in cleanly because we have to pass this information down.
//
// FIXME: This is a hack; find a cleaner way to integrate this into the
// process.
const char *LinkingOutput = nullptr;
if (isa<LipoJobAction>(A)) {
if (FinalOutput)
LinkingOutput = FinalOutput->getValue();
else
LinkingOutput = getDefaultImageName();
}
BuildJobsForAction(C, A, &C.getDefaultToolChain(),
/*BoundArch*/ StringRef(),
/*AtTopLevel*/ true,
/*MultipleArchs*/ ArchNames.size() > 1,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
/*LinkingOutput*/ LinkingOutput, CachedResults,
/*BuildForOffloadDevice*/ false);
}
// If the user passed -Qunused-arguments or there were errors, don't warn
// about any unused arguments.
if (Diags.hasErrorOccurred() ||
C.getArgs().hasArg(options::OPT_Qunused_arguments))
return;
// Claim -### here.
(void)C.getArgs().hasArg(options::OPT__HASH_HASH_HASH);
// Claim --driver-mode, --rsp-quoting, it was handled earlier.
(void)C.getArgs().hasArg(options::OPT_driver_mode);
(void)C.getArgs().hasArg(options::OPT_rsp_quoting);
for (Arg *A : C.getArgs()) {
// FIXME: It would be nice to be able to send the argument to the
// DiagnosticsEngine, so that extra values, position, and so on could be
// printed.
if (!A->isClaimed()) {
if (A->getOption().hasFlag(options::NoArgumentUnused))
continue;
// Suppress the warning automatically if this is just a flag, and it is an
// instance of an argument we already claimed.
const Option &Opt = A->getOption();
if (Opt.getKind() == Option::FlagClass) {
bool DuplicateClaimed = false;
for (const Arg *AA : C.getArgs().filtered(&Opt)) {
if (AA->isClaimed()) {
DuplicateClaimed = true;
break;
}
}
if (DuplicateClaimed)
continue;
}
// In clang-cl, don't mention unknown arguments here since they have
// already been warned about.
if (!IsCLMode() || !A->getOption().matches(options::OPT_UNKNOWN))
Diag(clang::diag::warn_drv_unused_argument)
<< A->getAsString(C.getArgs());
}
}
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
/// Collapse an offloading action looking for a job of the given type. The input
/// action is changed to the input of the collapsed sequence. If we effectively
/// had a collapse return the corresponding offloading action, otherwise return
/// null.
template <typename T>
static OffloadAction *collapseOffloadingAction(Action *&CurAction) {
if (!CurAction)
return nullptr;
if (auto *OA = dyn_cast<OffloadAction>(CurAction)) {
if (OA->hasHostDependence())
if (auto *HDep = dyn_cast<T>(OA->getHostDependence())) {
CurAction = HDep;
return OA;
}
if (OA->hasSingleDeviceDependence())
if (auto *DDep = dyn_cast<T>(OA->getSingleDeviceDependence())) {
CurAction = DDep;
return OA;
}
}
return nullptr;
}
// Returns a Tool for a given JobAction. In case the action and its
// predecessors can be combined, updates Inputs with the inputs of the
// first combined action. If one of the collapsed actions is a
// CudaHostAction, updates CollapsedCHA with the pointer to it so the
// caller can deal with extra handling such action requires.
static const Tool *selectToolForJob(Compilation &C, bool SaveTemps,
bool EmbedBitcode, const ToolChain *TC,
const JobAction *JA,
const ActionList *&Inputs,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
ActionList &CollapsedOffloadAction) {
const Tool *ToolForJob = nullptr;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
CollapsedOffloadAction.clear();
// See if we should look for a compiler with an integrated assembler. We match
// bottom up, so what we are actually looking for is an assembler job with a
// compiler input.
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Look through offload actions between assembler and backend actions.
Action *BackendJA = (isa<AssembleJobAction>(JA) && Inputs->size() == 1)
? *Inputs->begin()
: nullptr;
auto *BackendOA = collapseOffloadingAction<BackendJobAction>(BackendJA);
if (TC->useIntegratedAs() && !SaveTemps &&
!C.getArgs().hasArg(options::OPT_via_file_asm) &&
!C.getArgs().hasArg(options::OPT__SLASH_FA) &&
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
!C.getArgs().hasArg(options::OPT__SLASH_Fa) && BackendJA &&
isa<BackendJobAction>(BackendJA)) {
// A BackendJob is always preceded by a CompileJob, and without -save-temps
// or -fembed-bitcode, they will always get combined together, so instead of
// checking the backend tool, check if the tool for the CompileJob has an
// integrated assembler. For -fembed-bitcode, CompileJob is still used to
// look up tools for BackendJob, but they need to match before we can split
// them.
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Look through offload actions between backend and compile actions.
Action *CompileJA = *BackendJA->getInputs().begin();
auto *CompileOA = collapseOffloadingAction<CompileJobAction>(CompileJA);
assert(CompileJA && isa<CompileJobAction>(CompileJA) &&
"Backend job is not preceeded by compile job.");
const Tool *Compiler = TC->SelectTool(*cast<CompileJobAction>(CompileJA));
if (!Compiler)
return nullptr;
// When using -fembed-bitcode, it is required to have the same tool (clang)
// for both CompilerJA and BackendJA. Otherwise, combine two stages.
if (EmbedBitcode) {
JobAction *InputJA = cast<JobAction>(*Inputs->begin());
const Tool *BackendTool = TC->SelectTool(*InputJA);
if (BackendTool == Compiler)
CompileJA = InputJA;
}
if (Compiler->hasIntegratedAssembler()) {
Inputs = &CompileJA->getInputs();
ToolForJob = Compiler;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Save the collapsed offload actions because they may still contain
// device actions.
if (CompileOA)
CollapsedOffloadAction.push_back(CompileOA);
if (BackendOA)
CollapsedOffloadAction.push_back(BackendOA);
}
}
// A backend job should always be combined with the preceding compile job
// unless OPT_save_temps or OPT_fembed_bitcode is enabled and the compiler is
// capable of emitting LLVM IR as an intermediate output.
if (isa<BackendJobAction>(JA)) {
// Check if the compiler supports emitting LLVM IR.
assert(Inputs->size() == 1);
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Look through offload actions between backend and compile actions.
Action *CompileJA = *JA->getInputs().begin();
auto *CompileOA = collapseOffloadingAction<CompileJobAction>(CompileJA);
assert(CompileJA && isa<CompileJobAction>(CompileJA) &&
"Backend job is not preceeded by compile job.");
const Tool *Compiler = TC->SelectTool(*cast<CompileJobAction>(CompileJA));
if (!Compiler)
return nullptr;
if (!Compiler->canEmitIR() ||
(!SaveTemps && !EmbedBitcode)) {
Inputs = &CompileJA->getInputs();
ToolForJob = Compiler;
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
if (CompileOA)
CollapsedOffloadAction.push_back(CompileOA);
}
}
// Otherwise use the tool for the current job.
if (!ToolForJob)
ToolForJob = TC->SelectTool(*JA);
// See if we should use an integrated preprocessor. We do so when we have
// exactly one input, since this is the only use case we care about
// (irrelevant since we don't support combine yet).
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Look through offload actions after preprocessing.
Action *PreprocessJA = (Inputs->size() == 1) ? *Inputs->begin() : nullptr;
auto *PreprocessOA =
collapseOffloadingAction<PreprocessJobAction>(PreprocessJA);
if (PreprocessJA && isa<PreprocessJobAction>(PreprocessJA) &&
!C.getArgs().hasArg(options::OPT_no_integrated_cpp) &&
!C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps &&
!C.getArgs().hasArg(options::OPT_rewrite_objc) &&
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
ToolForJob->hasIntegratedCPP()) {
Inputs = &PreprocessJA->getInputs();
if (PreprocessOA)
CollapsedOffloadAction.push_back(PreprocessOA);
}
return ToolForJob;
}
InputInfo Driver::BuildJobsForAction(
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
bool BuildForOffloadDevice) const {
// The bound arch is not necessarily represented in the toolchain's triple --
// for example, armv7 and armv7s both map to the same triple -- so we need
// both in our map.
std::string TriplePlusArch = TC->getTriple().normalize();
if (!BoundArch.empty()) {
TriplePlusArch += "-";
TriplePlusArch += BoundArch;
}
std::pair<const Action *, std::string> ActionTC = {A, TriplePlusArch};
auto CachedResult = CachedResults.find(ActionTC);
if (CachedResult != CachedResults.end()) {
return CachedResult->second;
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
InputInfo Result = BuildJobsForActionNoCache(
C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, BuildForOffloadDevice);
CachedResults[ActionTC] = Result;
return Result;
}
InputInfo Driver::BuildJobsForActionNoCache(
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
bool BuildForOffloadDevice) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
InputInfoList OffloadDependencesInputInfo;
if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
// The offload action is expected to be used in four different situations.
//
// a) Set a toolchain/architecture/kind for a host action:
// Host Action 1 -> OffloadAction -> Host Action 2
//
// b) Set a toolchain/architecture/kind for a device action;
// Device Action 1 -> OffloadAction -> Device Action 2
//
// c) Specify a device dependences to a host action;
// Device Action 1 _
// \
// Host Action 1 ---> OffloadAction -> Host Action 2
//
// d) Specify a host dependence to a device action.
// Host Action 1 _
// \
// Device Action 1 ---> OffloadAction -> Device Action 2
//
// For a) and b), we just return the job generated for the dependence. For
// c) and d) we override the current action with the host/device dependence
// if the current toolchain is host/device and set the offload dependences
// info with the jobs obtained from the device/host dependence(s).
// If there is a single device option, just generate the job for it.
if (OA->hasSingleDeviceDependence()) {
InputInfo DevA;
OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC,
const char *DepBoundArch) {
DevA =
BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel,
/*MultipleArchs*/ !!DepBoundArch, LinkingOutput,
CachedResults, /*BuildForOffloadDevice=*/true);
});
return DevA;
}
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// If 'Action 2' is host, we generate jobs for the device dependences and
// override the current action with the host dependence. Otherwise, we
// generate the host dependences and override the action with the device
// dependence. The dependences can't therefore be a top-level action.
OA->doOnEachDependence(
/*IsHostDependence=*/BuildForOffloadDevice,
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
OffloadDependencesInputInfo.push_back(BuildJobsForAction(
C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false,
/*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults,
/*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() !=
Action::OFK_None));
});
A = BuildForOffloadDevice
? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)
: OA->getHostDependence();
}
if (const InputAction *IA = dyn_cast<InputAction>(A)) {
// FIXME: It would be nice to not claim this here; maybe the old scheme of
// just using Args was better?
const Arg &Input = IA->getInputArg();
Input.claim();
if (Input.getOption().matches(options::OPT_INPUT)) {
const char *Name = Input.getValue();
return InputInfo(A, Name, /* BaseInput = */ Name);
}
return InputInfo(A, &Input, /* BaseInput = */ "");
}
if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {
const ToolChain *TC;
StringRef ArchName = BAA->getArchName();
if (!ArchName.empty())
TC = &getToolChain(C.getArgs(),
computeTargetTriple(*this, DefaultTargetTriple,
C.getArgs(), ArchName));
else
TC = &C.getDefaultToolChain();
return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
MultipleArchs, LinkingOutput, CachedResults,
BuildForOffloadDevice);
}
const ActionList *Inputs = &A->getInputs();
const JobAction *JA = cast<JobAction>(A);
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
ActionList CollapsedOffloadActions;
const Tool *T =
selectToolForJob(C, isSaveTempsEnabled(), embedBitcodeEnabled(), TC, JA,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
Inputs, CollapsedOffloadActions);
if (!T)
return InputInfo();
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// If we've collapsed action list that contained OffloadAction we
// need to build jobs for host/device-side inputs it may have held.
for (const auto *OA : CollapsedOffloadActions)
cast<OffloadAction>(OA)->doOnEachDependence(
/*IsHostDependence=*/BuildForOffloadDevice,
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
OffloadDependencesInputInfo.push_back(BuildJobsForAction(
C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
/*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults,
/*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() !=
Action::OFK_None));
});
// Only use pipes when there is exactly one input.
InputInfoList InputInfos;
for (const Action *Input : *Inputs) {
// Treat dsymutil and verify sub-jobs as being at the top-level too, they
// shouldn't get temporary output names.
// FIXME: Clean this up.
bool SubJobAtTopLevel =
AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A));
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
InputInfos.push_back(BuildJobsForAction(
C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, BuildForOffloadDevice));
}
// Always use the first input as the base input.
const char *BaseInput = InputInfos[0].getBaseInput();
// ... except dsymutil actions, which use their actual input as the base
// input.
if (JA->getType() == types::TY_dSYM)
BaseInput = InputInfos[0].getFilename();
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
// Append outputs of offload device jobs to the input list
if (!OffloadDependencesInputInfo.empty())
InputInfos.append(OffloadDependencesInputInfo.begin(),
OffloadDependencesInputInfo.end());
// Set the effective triple of the toolchain for the duration of this job.
llvm::Triple EffectiveTriple;
const ToolChain &ToolTC = T->getToolChain();
const ArgList &Args = C.getArgsForToolChain(TC, BoundArch);
if (InputInfos.size() != 1) {
EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args));
} else {
// Pass along the input type if it can be unambiguously determined.
EffectiveTriple = llvm::Triple(
ToolTC.ComputeEffectiveClangTriple(Args, InputInfos[0].getType()));
}
RegisterEffectiveTriple TripleRAII(ToolTC, EffectiveTriple);
2010-08-02 10:38:15 +08:00
// Determine the place to write output to, if any.
InputInfo Result;
if (JA->getType() == types::TY_Nothing)
Result = InputInfo(A, BaseInput);
else
Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
AtTopLevel, MultipleArchs,
TC->getTriple().normalize()),
BaseInput);
if (CCCPrintBindings && !CCGenDiagnostics) {
llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"'
<< " - \"" << T->getName() << "\", inputs: [";
for (unsigned i = 0, e = InputInfos.size(); i != e; ++i) {
llvm::errs() << InputInfos[i].getAsString();
if (i + 1 != e)
llvm::errs() << ", ";
}
llvm::errs() << "], output: " << Result.getAsString() << "\n";
} else {
T->ConstructJob(C, *JA, Result, InputInfos,
C.getArgsForToolChain(TC, BoundArch), LinkingOutput);
}
return Result;
}
const char *Driver::getDefaultImageName() const {
llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple));
return Target.isOSWindows() ? "a.exe" : "a.out";
}
/// \brief Create output filename based on ArgValue, which could either be a
/// full filename, filename without extension, or a directory. If ArgValue
/// does not provide a filename, then use BaseName, and use the extension
/// suitable for FileType.
static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,
StringRef BaseName,
types::ID FileType) {
SmallString<128> Filename = ArgValue;
if (ArgValue.empty()) {
// If the argument is empty, output to BaseName in the current dir.
Filename = BaseName;
} else if (llvm::sys::path::is_separator(Filename.back())) {
// If the argument is a directory, output to BaseName in that dir.
llvm::sys::path::append(Filename, BaseName);
}
if (!llvm::sys::path::has_extension(ArgValue)) {
// If the argument didn't provide an extension, then set it.
const char *Extension = types::getTypeTempSuffix(FileType, true);
if (FileType == types::TY_Image &&
Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd)) {
// The output file is a dll.
Extension = "dll";
}
llvm::sys::path::replace_extension(Filename, Extension);
}
return Args.MakeArgString(Filename.c_str());
}
const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
const char *BaseInput,
StringRef BoundArch, bool AtTopLevel,
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
bool MultipleArchs,
StringRef NormalizedTriple) const {
llvm::PrettyStackTraceString CrashInfo("Computing output path");
// Output to a user requested destination?
if (AtTopLevel && !isa<DsymutilJobAction>(JA) && !isa<VerifyJobAction>(JA)) {
if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
return C.addResultFile(FinalOutput->getValue(), &JA);
}
// For /P, preprocess to file named after BaseInput.
if (C.getArgs().hasArg(options::OPT__SLASH_P)) {
assert(AtTopLevel && isa<PreprocessJobAction>(JA));
StringRef BaseName = llvm::sys::path::filename(BaseInput);
StringRef NameArg;
if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi))
NameArg = A->getValue();
return C.addResultFile(
MakeCLOutputFilename(C.getArgs(), NameArg, BaseName, types::TY_PP_C),
&JA);
}
// Default to writing to stdout?
if (AtTopLevel && !CCGenDiagnostics &&
(isa<PreprocessJobAction>(JA) || JA.getType() == types::TY_ModuleFile))
return "-";
// Is this the assembly listing for /FA?
if (JA.getType() == types::TY_PP_Asm &&
(C.getArgs().hasArg(options::OPT__SLASH_FA) ||
C.getArgs().hasArg(options::OPT__SLASH_Fa))) {
// Use /Fa and the input filename to determine the asm file name.
StringRef BaseName = llvm::sys::path::filename(BaseInput);
StringRef FaValue = C.getArgs().getLastArgValue(options::OPT__SLASH_Fa);
return C.addResultFile(
MakeCLOutputFilename(C.getArgs(), FaValue, BaseName, JA.getType()),
&JA);
}
// Output to a temporary file?
if ((!AtTopLevel && !isSaveTempsEnabled() &&
!C.getArgs().hasArg(options::OPT__SLASH_Fo)) ||
CCGenDiagnostics) {
StringRef Name = llvm::sys::path::filename(BaseInput);
std::pair<StringRef, StringRef> Split = Name.split('.');
std::string TmpName = GetTemporaryPath(
Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
}
SmallString<128> BasePath(BaseInput);
StringRef BaseName;
// Dsymutil actions should use the full path.
if (isa<DsymutilJobAction>(JA) || isa<VerifyJobAction>(JA))
BaseName = BasePath;
else
BaseName = llvm::sys::path::filename(BasePath);
// Determine what the derived output name should be.
const char *NamedOutput;
if ((JA.getType() == types::TY_Object || JA.getType() == types::TY_LTO_BC) &&
C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) {
// The /Fo or /o flag decides the object filename.
StringRef Val =
C.getArgs()
.getLastArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)
->getValue();
NamedOutput =
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object);
} else if (JA.getType() == types::TY_Image &&
C.getArgs().hasArg(options::OPT__SLASH_Fe,
options::OPT__SLASH_o)) {
// The /Fe or /o flag names the linked file.
StringRef Val =
C.getArgs()
.getLastArg(options::OPT__SLASH_Fe, options::OPT__SLASH_o)
->getValue();
NamedOutput =
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Image);
} else if (JA.getType() == types::TY_Image) {
if (IsCLMode()) {
// clang-cl uses BaseName for the executable name.
NamedOutput =
MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image);
} else if (MultipleArchs && !BoundArch.empty()) {
SmallString<128> Output(getDefaultImageName());
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
Output += JA.getOffloadingFileNamePrefix(NormalizedTriple);
Output += "-";
Output.append(BoundArch);
NamedOutput = C.getArgs().MakeArgString(Output.c_str());
clang-cl: Implement initial limited support for precompiled headers. In the gcc precompiled header model, one explicitly runs clang with `-x c++-header` on a .h file to produce a gch file, and then includes the header with `-include foo.h` and if a .gch file exists for that header it gets used. This is documented at http://clang.llvm.org/docs/UsersManual.html#precompiled-headers cl.exe's model is fairly different, and controlled by the two flags /Yc and /Yu. A pch file is generated as a side effect of a regular compilation when /Ycheader.h is passed. While the compilation is running, the compiler keeps track of #include lines in the main translation unit and writes everything up to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells the compiler to skip all code in the main TU up to and including `#include "header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu without an argument, in that case a `#pragma hrdstop` takes the role of controlling the point where pch ends and real code begins.) This patch implements limited support for this in that it requires the pch header to be passed as a /FI force include flag – with this restriction, it can be implemented almost completely in the driver with fairly small amounts of code. For /Yu, this is trivial, and for /Yc a separate pch action is added that runs before the actual compilation. After r261774, the first failing command makes a compilation stop – this means if the pch fails to build the main compilation won't run, which is what we want. However, in /fallback builds we need to run the main compilation even if the pch build fails so that the main compilation's fallback can run. To achieve this, add a ForceSuccessCommand that pretends that the pch build always succeeded in /fallback builds (the main compilation will then fail to open the pch and run the fallback cl.exe invocation). If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl will now emit a "not implemented yet; flag ignored" warning that can be disabled using -Wno-clang-cl-pch. Since clang-cl doesn't yet serialize some important things (most notably `pragma comment(lib, ...)`, this feature is disabled by default and only enabled by an internal driver flag. Once it's more stable, this internal flag will disappear. (The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it does. Implementing support for this should be possible with the approach in this patch with minimal frontend changes by passing a --stop-at / --start-at flag from the driver to the frontend. This is left for a follow-up. I don't think we ever want to support `#pragma hdrstop`, and supporting it with this approach isn't easy: This approach relies on the driver knowing the pch filename in advance, and `#pragma hdrstop(out.pch)` can set the output filename, so the driver can't know about it in advance.) clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe would put them, but the pch file format is of course incompatible. This has ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in /fallback builds. http://reviews.llvm.org/D17695 llvm-svn: 262420
2016-03-02 07:16:44 +08:00
} else {
NamedOutput = getDefaultImageName();
clang-cl: Implement initial limited support for precompiled headers. In the gcc precompiled header model, one explicitly runs clang with `-x c++-header` on a .h file to produce a gch file, and then includes the header with `-include foo.h` and if a .gch file exists for that header it gets used. This is documented at http://clang.llvm.org/docs/UsersManual.html#precompiled-headers cl.exe's model is fairly different, and controlled by the two flags /Yc and /Yu. A pch file is generated as a side effect of a regular compilation when /Ycheader.h is passed. While the compilation is running, the compiler keeps track of #include lines in the main translation unit and writes everything up to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells the compiler to skip all code in the main TU up to and including `#include "header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu without an argument, in that case a `#pragma hrdstop` takes the role of controlling the point where pch ends and real code begins.) This patch implements limited support for this in that it requires the pch header to be passed as a /FI force include flag – with this restriction, it can be implemented almost completely in the driver with fairly small amounts of code. For /Yu, this is trivial, and for /Yc a separate pch action is added that runs before the actual compilation. After r261774, the first failing command makes a compilation stop – this means if the pch fails to build the main compilation won't run, which is what we want. However, in /fallback builds we need to run the main compilation even if the pch build fails so that the main compilation's fallback can run. To achieve this, add a ForceSuccessCommand that pretends that the pch build always succeeded in /fallback builds (the main compilation will then fail to open the pch and run the fallback cl.exe invocation). If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl will now emit a "not implemented yet; flag ignored" warning that can be disabled using -Wno-clang-cl-pch. Since clang-cl doesn't yet serialize some important things (most notably `pragma comment(lib, ...)`, this feature is disabled by default and only enabled by an internal driver flag. Once it's more stable, this internal flag will disappear. (The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it does. Implementing support for this should be possible with the approach in this patch with minimal frontend changes by passing a --stop-at / --start-at flag from the driver to the frontend. This is left for a follow-up. I don't think we ever want to support `#pragma hdrstop`, and supporting it with this approach isn't easy: This approach relies on the driver knowing the pch filename in advance, and `#pragma hdrstop(out.pch)` can set the output filename, so the driver can't know about it in advance.) clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe would put them, but the pch file format is of course incompatible. This has ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in /fallback builds. http://reviews.llvm.org/D17695 llvm-svn: 262420
2016-03-02 07:16:44 +08:00
}
} else if (JA.getType() == types::TY_PCH && IsCLMode()) {
NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName).c_str());
} else {
const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
assert(Suffix && "All types used for output should have a suffix.");
std::string::size_type End = std::string::npos;
if (!types::appendSuffixForType(JA.getType()))
End = BaseName.rfind('.');
SmallString<128> Suffixed(BaseName.substr(0, End));
[CUDA][OpenMP] Create generic offload action Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 llvm-svn: 275645
2016-07-16 07:13:27 +08:00
Suffixed += JA.getOffloadingFileNamePrefix(NormalizedTriple);
if (MultipleArchs && !BoundArch.empty()) {
Suffixed += "-";
Suffixed.append(BoundArch);
}
// When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for
// the unoptimized bitcode so that it does not get overwritten by the ".bc"
// optimized bitcode output.
if (!AtTopLevel && C.getArgs().hasArg(options::OPT_emit_llvm) &&
JA.getType() == types::TY_LLVM_BC)
Suffixed += ".tmp";
Suffixed += '.';
Suffixed += Suffix;
NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str());
}
// Prepend object file path if -save-temps=obj
if (!AtTopLevel && isSaveTempsObj() && C.getArgs().hasArg(options::OPT_o) &&
JA.getType() != types::TY_PCH) {
Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
SmallString<128> TempPath(FinalOutput->getValue());
llvm::sys::path::remove_filename(TempPath);
StringRef OutputFileName = llvm::sys::path::filename(NamedOutput);
llvm::sys::path::append(TempPath, OutputFileName);
NamedOutput = C.getArgs().MakeArgString(TempPath.c_str());
}
// If we're saving temps and the temp file conflicts with the input file,
// then avoid overwriting input file.
if (!AtTopLevel && isSaveTempsEnabled() && NamedOutput == BaseName) {
bool SameFile = false;
SmallString<256> Result;
llvm::sys::fs::current_path(Result);
llvm::sys::path::append(Result, BaseName);
llvm::sys::fs::equivalent(BaseInput, Result.c_str(), SameFile);
// Must share the same path to conflict.
if (SameFile) {
StringRef Name = llvm::sys::path::filename(BaseInput);
std::pair<StringRef, StringRef> Split = Name.split('.');
std::string TmpName = GetTemporaryPath(
Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
}
}
// As an annoying special case, PCH generation doesn't strip the pathname.
if (JA.getType() == types::TY_PCH && !IsCLMode()) {
llvm::sys::path::remove_filename(BasePath);
if (BasePath.empty())
BasePath = NamedOutput;
else
llvm::sys::path::append(BasePath, NamedOutput);
return C.addResultFile(C.getArgs().MakeArgString(BasePath.c_str()), &JA);
} else {
return C.addResultFile(NamedOutput, &JA);
}
}
std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const {
// Respect a limited subset of the '-Bprefix' functionality in GCC by
2012-10-04 16:08:56 +08:00
// attempting to use this prefix when looking for file paths.
for (const std::string &Dir : PrefixDirs) {
if (Dir.empty())
continue;
SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return P.str();
}
SmallString<128> P(ResourceDir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return P.str();
for (const std::string &Dir : TC.getFilePaths()) {
if (Dir.empty())
continue;
SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
llvm::sys::path::append(P, Name);
if (llvm::sys::fs::exists(Twine(P)))
return P.str();
}
return Name;
}
void Driver::generatePrefixedToolNames(
StringRef Tool, const ToolChain &TC,
SmallVectorImpl<std::string> &Names) const {
// FIXME: Needs a better variable than DefaultTargetTriple
Names.emplace_back((DefaultTargetTriple + "-" + Tool).str());
Names.emplace_back(Tool);
// Allow the discovery of tools prefixed with LLVM's default target triple.
std::string LLVMDefaultTargetTriple = llvm::sys::getDefaultTargetTriple();
if (LLVMDefaultTargetTriple != DefaultTargetTriple)
Names.emplace_back((LLVMDefaultTargetTriple + "-" + Tool).str());
}
static bool ScanDirForExecutable(SmallString<128> &Dir,
ArrayRef<std::string> Names) {
for (const auto &Name : Names) {
llvm::sys::path::append(Dir, Name);
if (llvm::sys::fs::can_execute(Twine(Dir)))
return true;
llvm::sys::path::remove_filename(Dir);
}
return false;
}
std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const {
SmallVector<std::string, 2> TargetSpecificExecutables;
generatePrefixedToolNames(Name, TC, TargetSpecificExecutables);
// Respect a limited subset of the '-Bprefix' functionality in GCC by
2012-10-04 16:08:56 +08:00
// attempting to use this prefix when looking for program paths.
for (const auto &PrefixDir : PrefixDirs) {
if (llvm::sys::fs::is_directory(PrefixDir)) {
SmallString<128> P(PrefixDir);
if (ScanDirForExecutable(P, TargetSpecificExecutables))
return P.str();
} else {
SmallString<128> P((PrefixDir + Name).str());
if (llvm::sys::fs::can_execute(Twine(P)))
return P.str();
}
}
const ToolChain::path_list &List = TC.getProgramPaths();
for (const auto &Path : List) {
SmallString<128> P(Path);
if (ScanDirForExecutable(P, TargetSpecificExecutables))
return P.str();
}
// If all else failed, search the path.
2014-11-08 05:30:32 +08:00
for (const auto &TargetSpecificExecutable : TargetSpecificExecutables)
if (llvm::ErrorOr<std::string> P =
llvm::sys::findProgramByName(TargetSpecificExecutable))
return *P;
return Name;
}
std::string Driver::GetTemporaryPath(StringRef Prefix, StringRef Suffix) const {
SmallString<128> Path;
std::error_code EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path);
if (EC) {
Diag(clang::diag::err_unable_to_make_temp) << EC.message();
return "";
}
return Path.str();
clang-cl: Implement initial limited support for precompiled headers. In the gcc precompiled header model, one explicitly runs clang with `-x c++-header` on a .h file to produce a gch file, and then includes the header with `-include foo.h` and if a .gch file exists for that header it gets used. This is documented at http://clang.llvm.org/docs/UsersManual.html#precompiled-headers cl.exe's model is fairly different, and controlled by the two flags /Yc and /Yu. A pch file is generated as a side effect of a regular compilation when /Ycheader.h is passed. While the compilation is running, the compiler keeps track of #include lines in the main translation unit and writes everything up to an `#include "header.h"` line into a pch file. Conversely, /Yuheader.h tells the compiler to skip all code in the main TU up to and including `#include "header.h"` and instead load header.pch. (It's also possible to use /Yc and /Yu without an argument, in that case a `#pragma hrdstop` takes the role of controlling the point where pch ends and real code begins.) This patch implements limited support for this in that it requires the pch header to be passed as a /FI force include flag – with this restriction, it can be implemented almost completely in the driver with fairly small amounts of code. For /Yu, this is trivial, and for /Yc a separate pch action is added that runs before the actual compilation. After r261774, the first failing command makes a compilation stop – this means if the pch fails to build the main compilation won't run, which is what we want. However, in /fallback builds we need to run the main compilation even if the pch build fails so that the main compilation's fallback can run. To achieve this, add a ForceSuccessCommand that pretends that the pch build always succeeded in /fallback builds (the main compilation will then fail to open the pch and run the fallback cl.exe invocation). If /Yc /Yu are used in a setup that clang-cl doesn't implement yet, clang-cl will now emit a "not implemented yet; flag ignored" warning that can be disabled using -Wno-clang-cl-pch. Since clang-cl doesn't yet serialize some important things (most notably `pragma comment(lib, ...)`, this feature is disabled by default and only enabled by an internal driver flag. Once it's more stable, this internal flag will disappear. (The default stdafx.h setup passes stdafx.h as explicit argument to /Yc but not as /FI – instead every single TU has to `#include <stdafx.h>` as first thing it does. Implementing support for this should be possible with the approach in this patch with minimal frontend changes by passing a --stop-at / --start-at flag from the driver to the frontend. This is left for a follow-up. I don't think we ever want to support `#pragma hdrstop`, and supporting it with this approach isn't easy: This approach relies on the driver knowing the pch filename in advance, and `#pragma hdrstop(out.pch)` can set the output filename, so the driver can't know about it in advance.) clang-cl now also honors /Fp and puts pch files in the same spot that cl.exe would put them, but the pch file format is of course incompatible. This has ramifications on /fallback, so /Yc /Yu aren't passed through to cl.exe in /fallback builds. http://reviews.llvm.org/D17695 llvm-svn: 262420
2016-03-02 07:16:44 +08:00
}
std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const {
SmallString<128> Output;
if (Arg *FpArg = C.getArgs().getLastArg(options::OPT__SLASH_Fp)) {
// FIXME: If anybody needs it, implement this obscure rule:
// "If you specify a directory without a file name, the default file name
// is VCx0.pch., where x is the major version of Visual C++ in use."
Output = FpArg->getValue();
// "If you do not specify an extension as part of the path name, an
// extension of .pch is assumed. "
if (!llvm::sys::path::has_extension(Output))
Output += ".pch";
} else {
Output = BaseName;
llvm::sys::path::replace_extension(Output, ".pch");
}
return Output.str();
}
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
const ToolChain &Driver::getToolChain(const ArgList &Args,
const llvm::Triple &Target) const {
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
Revert r149083 which is not the direction we're going in the Clang driver based on discussions with Doug Gregor. There are several issues: 1) The patch was not reviewed prior to commit and there were review comments. 2) The design of the functionality (triple-prefixed tool invocation) isn't the design we want for Clang going forward: it focuses on the "user triple" rather than on the "toolchain triple", and forces that bit of state into the API of every single toolchain instead of handling it automatically in the common base classes. 3) The tests provided are not stable. They fail on a few Linux variants (Gentoo among them) and on mingw32 and some other environments. I *am* interested in the Clang driver being able to invoke triple-prefixed tools, but we need to design that feature the right way. This patch just extends the previous hack without fixing the underlying problems with it. I'm working on a new design for this that I will mail for review by tomorrow. I am aware that this removes functionality that NetBSD relies on, but this is ToT, not a release. This functionality hasn't been properly designed, implemented, and tested yet. We can't "regress" until we get something that really works, both with the immediate use cases and with long term maintenance of the Clang driver. For reference, the original commit log: Keep track of the original target the user specified before normalization. This used to be captured in DefaultTargetTriple and is used for the (optional) $triple-$tool lookup for cross-compilation. Do this properly by making it an attribute of the toolchain and use it in combination with the computed triple as index for the toolchain lookup. llvm-svn: 149337
2012-01-31 10:21:20 +08:00
ToolChain *&TC = ToolChains[Target.str()];
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
if (!TC) {
switch (Target.getOS()) {
case llvm::Triple::Haiku:
TC = new toolchains::Haiku(*this, Target, Args);
break;
case llvm::Triple::CloudABI:
TC = new toolchains::CloudABI(*this, Target, Args);
break;
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
case llvm::Triple::Darwin:
case llvm::Triple::MacOSX:
case llvm::Triple::IOS:
case llvm::Triple::TvOS:
case llvm::Triple::WatchOS:
TC = new toolchains::DarwinClang(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::DragonFly:
TC = new toolchains::DragonFly(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::OpenBSD:
TC = new toolchains::OpenBSD(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::Bitrig:
TC = new toolchains::Bitrig(*this, Target, Args);
break;
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
case llvm::Triple::NetBSD:
TC = new toolchains::NetBSD(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::FreeBSD:
TC = new toolchains::FreeBSD(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::Minix:
TC = new toolchains::Minix(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::Linux:
case llvm::Triple::ELFIAMCU:
if (Target.getArch() == llvm::Triple::hexagon)
TC = new toolchains::HexagonToolChain(*this, Target, Args);
else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
!Target.hasEnvironment())
TC = new toolchains::MipsLLVMToolChain(*this, Target, Args);
else
TC = new toolchains::Linux(*this, Target, Args);
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::NaCl:
TC = new toolchains::NaClToolChain(*this, Target, Args);
break;
case llvm::Triple::Fuchsia:
TC = new toolchains::Fuchsia(*this, Target, Args);
break;
case llvm::Triple::Solaris:
TC = new toolchains::Solaris(*this, Target, Args);
break;
case llvm::Triple::AMDHSA:
TC = new toolchains::AMDGPUToolChain(*this, Target, Args);
break;
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
case llvm::Triple::Win32:
switch (Target.getEnvironment()) {
default:
if (Target.isOSBinFormatELF())
TC = new toolchains::Generic_ELF(*this, Target, Args);
else if (Target.isOSBinFormatMachO())
TC = new toolchains::MachO(*this, Target, Args);
else
TC = new toolchains::Generic_GCC(*this, Target, Args);
break;
case llvm::Triple::GNU:
TC = new toolchains::MinGW(*this, Target, Args);
break;
case llvm::Triple::Itanium:
TC = new toolchains::CrossWindowsToolChain(*this, Target, Args);
break;
case llvm::Triple::MSVC:
case llvm::Triple::UnknownEnvironment:
TC = new toolchains::MSVCToolChain(*this, Target, Args);
break;
}
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
break;
case llvm::Triple::CUDA:
TC = new toolchains::CudaToolChain(*this, Target, Args);
break;
case llvm::Triple::PS4:
TC = new toolchains::PS4CPU(*this, Target, Args);
break;
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
default:
// Of these targets, Hexagon is the only one that might have
// an OS of Linux, in which case it got handled above already.
switch (Target.getArch()) {
case llvm::Triple::tce:
TC = new toolchains::TCEToolChain(*this, Target, Args);
break;
case llvm::Triple::hexagon:
TC = new toolchains::HexagonToolChain(*this, Target, Args);
break;
case llvm::Triple::lanai:
TC = new toolchains::LanaiToolChain(*this, Target, Args);
break;
case llvm::Triple::xcore:
TC = new toolchains::XCoreToolChain(*this, Target, Args);
break;
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
TC = new toolchains::WebAssembly(*this, Target, Args);
break;
default:
if (Target.getVendor() == llvm::Triple::Myriad)
TC = new toolchains::MyriadToolChain(*this, Target, Args);
else if (Target.isOSBinFormatELF())
TC = new toolchains::Generic_ELF(*this, Target, Args);
else if (Target.isOSBinFormatMachO())
TC = new toolchains::MachO(*this, Target, Args);
else
TC = new toolchains::Generic_GCC(*this, Target, Args);
}
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
}
}
Delete the driver's HostInfo class. This abstraction just never really did anything. The two big pieces of functionality it tried to provide was to cache the ToolChain objects for each target, and to figure out the exact target based on the flag set coming in to an invocation. However, it had a lot of flaws even with those goals: - Neither of these have anything to do with the host, or its info. - The HostInfo class was setup as a full blown class *hierarchy* with a separate implementation for each "host" OS. This required dispatching just to create the objects in the first place. - The hierarchy claimed to represent the host, when in fact it was based on the target OS. - Each leaf in the hierarchy was responsible for implementing the flag processing and caching, resulting in a *lot* of copy-paste code and quite a few bugs. - The caching was consistently done based on architecture alone, even though *any* aspect of the targeted triple might change the behavior of the configured toolchain. - Flag processing was already being done in the Driver proper, separating the flag handling even more than it already is. Instead of this, we can simply have the dispatch logic in the Driver which previously created a HostInfo object create the ToolChain objects. Adding caching in the Driver layer is a tiny amount of code. Finally, pulling the flag processing into the Driver puts it where it belongs and consolidates it in one location. The result is that two functions, and maybe 100 lines of new code replace over 10 classes and 800 lines of code. Woot. This also paves the way to introduce more detailed ToolChain objects for various OSes without threading through a new HostInfo type as well, and the accompanying boiler plate. That, of course, was the yak I started to shave that began this entire refactoring escapade. Wheee! llvm-svn: 148950
2012-01-25 19:01:57 +08:00
return *TC;
}
bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
// Say "no" if there is not exactly one input of a type clang understands.
if (JA.size() != 1 ||
!types::isAcceptedByClang((*JA.input_begin())->getType()))
return false;
// And say "no" if this is not a kind of action clang understands.
if (!isa<PreprocessJobAction>(JA) && !isa<PrecompileJobAction>(JA) &&
!isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
return false;
return true;
}
/// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and return the
/// grouped values as integers. Numbers which are not provided are set to 0.
///
/// \return True if the entire string was parsed (9.2), or all groups were
/// parsed (10.3.5extrastuff).
bool Driver::GetReleaseVersion(StringRef Str, unsigned &Major, unsigned &Minor,
unsigned &Micro, bool &HadExtra) {
HadExtra = false;
Major = Minor = Micro = 0;
if (Str.empty())
return false;
if (Str.consumeInteger(10, Major))
return false;
if (Str.empty())
return true;
if (Str[0] != '.')
return false;
Str = Str.drop_front(1);
if (Str.consumeInteger(10, Minor))
return false;
if (Str.empty())
return true;
if (Str[0] != '.')
return false;
Str = Str.drop_front(1);
if (Str.consumeInteger(10, Micro))
return false;
if (!Str.empty())
HadExtra = true;
return true;
}
/// Parse digits from a string \p Str and fulfill \p Digits with
/// the parsed numbers. This method assumes that the max number of
/// digits to look for is equal to Digits.size().
///
/// \return True if the entire string was parsed and there are
/// no extra characters remaining at the end.
bool Driver::GetReleaseVersion(StringRef Str,
MutableArrayRef<unsigned> Digits) {
if (Str.empty())
return false;
unsigned CurDigit = 0;
while (CurDigit < Digits.size()) {
unsigned Digit;
if (Str.consumeInteger(10, Digit))
return false;
Digits[CurDigit] = Digit;
if (Str.empty())
return true;
if (Str[0] != '.')
return false;
Str = Str.drop_front(1);
CurDigit++;
}
// More digits than requested, bail out...
return false;
}
std::pair<unsigned, unsigned> Driver::getIncludeExcludeOptionFlagMasks() const {
unsigned IncludedFlagsBitmask = 0;
unsigned ExcludedFlagsBitmask = options::NoDriverOption;
if (Mode == CLMode) {
// Include CL and Core options.
IncludedFlagsBitmask |= options::CLOption;
IncludedFlagsBitmask |= options::CoreOption;
} else {
ExcludedFlagsBitmask |= options::CLOption;
}
return std::make_pair(IncludedFlagsBitmask, ExcludedFlagsBitmask);
}
bool clang::driver::isOptimizationLevelFast(const ArgList &Args) {
return Args.hasFlag(options::OPT_Ofast, options::OPT_O_Group, false);
}