forked from OSchip/llvm-project
[hip] Properly populate macros based on host processor.
Summary: - The device compilation needs to have a consistent source code compared to the corresponding host compilation. If macros based on the host-specific target processor is not properly populated, the device compilation may fail due to the inconsistent source after the preprocessor. So far, only the host triple is used to build the macros. If a detailed host CPU target or certain features are specified, macros derived from them won't be populated properly, e.g. `__SSE3__` won't be added unless `+sse3` feature is present. On Windows compilation compatible with MSVC, that missing macros result in that intrinsics are not included and cause device compilation failure on the host-side source. - This patch addresses this issue by introducing two `cc1` options, i.e., `-aux-target-cpu` and `-aux-target-feature`. If a specific host CPU target or certain features are specified, the compiler driver will append them during the construction of the offline compilation actions. Then, the toolchain in `cc1` phase will populate macros accordingly. - An internal option `--gpu-use-aux-triple-only` is added to fall back the original behavior to help diagnosing potential issues from the new behavior. Reviewers: tra, yaxunl Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D73942
This commit is contained in:
parent
cfacf9ae20
commit
ccac6b2bf8
|
@ -482,6 +482,10 @@ def cc1as : Flag<["-"], "cc1as">;
|
|||
def ast_merge : Separate<["-"], "ast-merge">,
|
||||
MetaVarName<"<ast file>">,
|
||||
HelpText<"Merge the given AST file into the translation unit being compiled.">;
|
||||
def aux_target_cpu : Separate<["-"], "aux-target-cpu">,
|
||||
HelpText<"Target a specific auxiliary cpu type">;
|
||||
def aux_target_feature : Separate<["-"], "aux-target-feature">,
|
||||
HelpText<"Target specific auxiliary attributes">;
|
||||
def aux_triple : Separate<["-"], "aux-triple">,
|
||||
HelpText<"Auxiliary target triple.">;
|
||||
def code_completion_at : Separate<["-"], "code-completion-at">,
|
||||
|
|
|
@ -549,6 +549,9 @@ def c : Flag<["-"], "c">, Flags<[DriverOption]>, Group<Action_Group>,
|
|||
def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group<f_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"Assume functions may be convergent">;
|
||||
|
||||
def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
|
||||
InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
|
||||
"'-aux-target-cpu' and '-aux-target-feature'.">;
|
||||
def cuda_device_only : Flag<["--"], "cuda-device-only">,
|
||||
HelpText<"Compile CUDA code for device only">;
|
||||
def cuda_host_only : Flag<["--"], "cuda-host-only">,
|
||||
|
|
|
@ -426,9 +426,15 @@ public:
|
|||
/// (in the format produced by -fdump-record-layouts).
|
||||
std::string OverrideRecordLayoutsFile;
|
||||
|
||||
/// Auxiliary triple for CUDA compilation.
|
||||
/// Auxiliary triple for CUDA/HIP compilation.
|
||||
std::string AuxTriple;
|
||||
|
||||
/// Auxiliary target CPU for CUDA/HIP compilation.
|
||||
Optional<std::string> AuxTargetCPU;
|
||||
|
||||
/// Auxiliary target features for CUDA/HIP compilation.
|
||||
Optional<std::vector<std::string>> AuxTargetFeatures;
|
||||
|
||||
/// Filename to write statistics to.
|
||||
std::string StatsFile;
|
||||
|
||||
|
|
|
@ -309,7 +309,7 @@ static void getWebAssemblyTargetFeatures(const ArgList &Args,
|
|||
|
||||
static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
|
||||
const ArgList &Args, ArgStringList &CmdArgs,
|
||||
bool ForAS) {
|
||||
bool ForAS, bool IsAux = false) {
|
||||
const Driver &D = TC.getDriver();
|
||||
std::vector<StringRef> Features;
|
||||
switch (Triple.getArch()) {
|
||||
|
@ -387,7 +387,7 @@ static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
|
|||
if (Last != I)
|
||||
continue;
|
||||
|
||||
CmdArgs.push_back("-target-feature");
|
||||
CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature");
|
||||
CmdArgs.push_back(Name.data());
|
||||
}
|
||||
}
|
||||
|
@ -4627,6 +4627,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
AsynchronousUnwindTables))
|
||||
CmdArgs.push_back("-munwind-tables");
|
||||
|
||||
// Prepare `-aux-target-cpu` and `-aux-target-feature` unless
|
||||
// `--gpu-use-aux-triple-only` is specified.
|
||||
if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
|
||||
((IsCuda && JA.isDeviceOffloading(Action::OFK_Cuda)) ||
|
||||
(IsHIP && JA.isDeviceOffloading(Action::OFK_HIP)))) {
|
||||
const ArgList &HostArgs =
|
||||
C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
|
||||
std::string HostCPU =
|
||||
getCPUName(HostArgs, *TC.getAuxTriple(), /*FromAs*/ false);
|
||||
if (!HostCPU.empty()) {
|
||||
CmdArgs.push_back("-aux-target-cpu");
|
||||
CmdArgs.push_back(Args.MakeArgString(HostCPU));
|
||||
}
|
||||
getTargetFeatures(TC, *TC.getAuxTriple(), HostArgs, CmdArgs,
|
||||
/*ForAS*/ false, /*IsAux*/ true);
|
||||
}
|
||||
|
||||
TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());
|
||||
|
||||
// FIXME: Handle -mtune=.
|
||||
|
|
|
@ -923,6 +923,10 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
|
|||
!getFrontendOpts().AuxTriple.empty()) {
|
||||
auto TO = std::make_shared<TargetOptions>();
|
||||
TO->Triple = llvm::Triple::normalize(getFrontendOpts().AuxTriple);
|
||||
if (getFrontendOpts().AuxTargetCPU)
|
||||
TO->CPU = getFrontendOpts().AuxTargetCPU.getValue();
|
||||
if (getFrontendOpts().AuxTargetFeatures)
|
||||
TO->FeaturesAsWritten = getFrontendOpts().AuxTargetFeatures.getValue();
|
||||
TO->HostTriple = getTarget().getTriple().str();
|
||||
setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO));
|
||||
}
|
||||
|
|
|
@ -1931,6 +1931,10 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
|
|||
Opts.OverrideRecordLayoutsFile =
|
||||
std::string(Args.getLastArgValue(OPT_foverride_record_layout_EQ));
|
||||
Opts.AuxTriple = std::string(Args.getLastArgValue(OPT_aux_triple));
|
||||
if (Args.hasArg(OPT_aux_target_cpu))
|
||||
Opts.AuxTargetCPU = std::string(Args.getLastArgValue(OPT_aux_target_cpu));
|
||||
if (Args.hasArg(OPT_aux_target_feature))
|
||||
Opts.AuxTargetFeatures = Args.getAllArgValues(OPT_aux_target_feature);
|
||||
Opts.StatsFile = std::string(Args.getLastArgValue(OPT_stats_file));
|
||||
|
||||
if (const Arg *A = Args.getLastArg(OPT_arcmt_check,
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -### -c -target x86_64-linux-gnu -march=znver2 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTCPU
|
||||
// RUN: %clang -### -c -target x86_64-linux-gnu -msse3 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTSSE3
|
||||
// RUN: %clang -### -c -target x86_64-linux-gnu --gpu-use-aux-triple-only -march=znver2 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=NOHOSTCPU
|
||||
|
||||
// HOSTCPU: "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// HOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// HOSTCPU-SAME: "-aux-target-cpu" "znver2"
|
||||
|
||||
// HOSTSSE3: "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// HOSTSSE3-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// HOSTSSE3-SAME: "-aux-target-feature" "+sse3"
|
||||
|
||||
// NOHOSTCPU: "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// NOHOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// NOHOSTCPU-NOT: "-aux-target-cpu" "znver2"
|
|
@ -0,0 +1,13 @@
|
|||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
DEVICE __SSE3__
|
||||
#else
|
||||
HOST __SSE3__
|
||||
#endif
|
||||
|
||||
// RUN: %clang -x hip -E -target x86_64-linux-gnu -msse3 --cuda-gpu-arch=gfx803 -nogpulib -o - %s 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK-NOT: SSE3
|
Loading…
Reference in New Issue