[CUDA][HIP] Add -Xarch_device and -Xarch_host options

The argument after -Xarch_device will be added to the arguments for CUDA/HIP
device compilation and will be removed for host compilation.

The argument after -Xarch_host will be added to the arguments for CUDA/HIP
host compilation and will be removed for device compilation.

Differential Revision: https://reviews.llvm.org/D76520
This commit is contained in:
Yaxun (Sam) Liu 2020-03-23 14:23:09 -04:00
parent d381b6a8d3
commit 2ae25647d1
7 changed files with 105 additions and 24 deletions

View File

@ -466,6 +466,10 @@ def Xanalyzer : Separate<["-"], "Xanalyzer">,
HelpText<"Pass <arg> to the static analyzer">, MetaVarName<"<arg>">,
Group<StaticAnalyzer_Group>;
def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[DriverOption]>;
def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[DriverOption]>,
HelpText<"Pass <arg> to the CUDA/HIP host compilation">, MetaVarName<"<arg>">;
def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[DriverOption]>,
HelpText<"Pass <arg> to the CUDA/HIP device compilation">, MetaVarName<"<arg>">;
def Xassembler : Separate<["-"], "Xassembler">,
HelpText<"Pass <arg> to the assembler">, MetaVarName<"<arg>">,
Group<CompileOnly_Group>;

View File

@ -296,10 +296,20 @@ public:
SmallVectorImpl<llvm::opt::Arg *> &AllocatedArgs) const;
/// Append the argument following \p A to \p DAL assuming \p A is an Xarch
/// argument.
virtual void TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
llvm::opt::Arg *&A,
llvm::opt::DerivedArgList *DAL) const;
/// argument. If \p AllocatedArgs is null pointer, synthesized arguments are
/// added to \p DAL, otherwise they are appended to \p AllocatedArgs.
virtual void TranslateXarchArgs(
const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A,
llvm::opt::DerivedArgList *DAL,
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs = nullptr) const;
/// Translate -Xarch_ arguments. If there are no such arguments, return
/// a null pointer, otherwise return a DerivedArgList containing the
/// translated arguments.
virtual llvm::opt::DerivedArgList *
TranslateXarchArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind,
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const;
/// Choose a tool to use to handle the action \p JA.
///

View File

@ -76,16 +76,29 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch,
*TranslatedArgs, SameTripleAsHost, AllocatedArgs);
}
DerivedArgList *NewDAL = nullptr;
if (!OpenMPArgs) {
NewDAL = TC->TranslateXarchArgs(*TranslatedArgs, BoundArch,
DeviceOffloadKind, &AllocatedArgs);
} else {
NewDAL = TC->TranslateXarchArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind,
&AllocatedArgs);
if (!NewDAL)
NewDAL = OpenMPArgs;
else
delete OpenMPArgs;
}
if (!NewDAL) {
Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind);
if (!Entry)
Entry = TranslatedArgs;
} else {
Entry = TC->TranslateArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind);
Entry = TC->TranslateArgs(*NewDAL, BoundArch, DeviceOffloadKind);
if (!Entry)
Entry = OpenMPArgs;
Entry = NewDAL;
else
delete OpenMPArgs;
delete NewDAL;
}
// Add allocated arguments to the final DAL.

View File

@ -1103,11 +1103,20 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOpenMPTargetArgs(
return nullptr;
}
void ToolChain::TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
llvm::opt::Arg *&A,
llvm::opt::DerivedArgList *DAL) const {
// TODO: Currently argument values separated by space e.g.
// -Xclang -mframe-pointer=no cannot be passed by -Xarch_. This should be
// fixed.
void ToolChain::TranslateXarchArgs(
const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A,
llvm::opt::DerivedArgList *DAL,
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const {
const OptTable &Opts = getDriver().getOpts();
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
unsigned ValuePos = 1;
if (A->getOption().matches(options::OPT_Xarch_device) ||
A->getOption().matches(options::OPT_Xarch_host))
ValuePos = 0;
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos));
unsigned Prev = Index;
std::unique_ptr<llvm::opt::Arg> XarchArg(Opts.ParseOneArg(Args, Index));
@ -1130,5 +1139,49 @@ void ToolChain::TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
}
XarchArg->setBaseArg(A);
A = XarchArg.release();
DAL->AddSynthesizedArg(A);
if (!AllocatedArgs)
DAL->AddSynthesizedArg(A);
else
AllocatedArgs->push_back(A);
}
llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs(
const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind OFK,
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const {
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
bool Modified = false;
bool IsGPU = OFK == Action::OFK_Cuda || OFK == Action::OFK_HIP;
for (Arg *A : Args) {
bool NeedTrans = false;
bool Skip = false;
if (A->getOption().matches(options::OPT_Xarch_device)) {
NeedTrans = IsGPU;
Skip = !IsGPU;
} else if (A->getOption().matches(options::OPT_Xarch_host)) {
NeedTrans = !IsGPU;
Skip = IsGPU;
} else if (A->getOption().matches(options::OPT_Xarch__) && IsGPU) {
// Do not translate -Xarch_ options for non CUDA/HIP toolchain since
// they may need special translation.
// Skip this argument unless the architecture matches BoundArch
if (BoundArch.empty() || A->getValue(0) != BoundArch)
Skip = true;
else
NeedTrans = true;
}
if (NeedTrans || Skip)
Modified = true;
if (NeedTrans)
TranslateXarchArgs(Args, A, DAL, AllocatedArgs);
if (!Skip)
DAL->append(A);
}
if (Modified)
return DAL;
delete DAL;
return nullptr;
}

View File

@ -800,12 +800,6 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
}
for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_Xarch__)) {
// Skip this argument unless the architecture matches BoundArch
if (BoundArch.empty() || A->getValue(0) != BoundArch)
continue;
TranslateXarchArgs(Args, A, DAL);
}
DAL->append(A);
}

View File

@ -378,12 +378,6 @@ HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
const OptTable &Opts = getDriver().getOpts();
for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_Xarch__)) {
// Skip this argument unless the architecture matches BoundArch.
if (BoundArch.empty() || A->getValue(0) != BoundArch)
continue;
TranslateXarchArgs(Args, A, DAL);
}
DAL->append(A);
}

View File

@ -13,3 +13,16 @@
// RUN: -mllvm -amdgpu-early-inline-all=true %s 2>&1 | \
// RUN: FileCheck -check-prefix=MLLVM %s
// MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true"
// RUN: %clang -### -Xarch_device -g -nogpulib --cuda-gpu-arch=gfx900 \
// RUN: -Xarch_device -fcf-protection=branch \
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEV %s
// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
// DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}"
// RUN: %clang -### -Xarch_host -g -nogpulib --cuda-gpu-arch=gfx900 \
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=HOST %s
// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
// HOST: clang{{.*}} "-debug-info-kind={{.*}}"