[CUDA] Handle -O options (more) correctly.

Summary:
Previously we'd crash the driver if you passed -O0.  Now we try to
handle all of clang's various optimization flags in a sane way.

Reviewers: tra

Subscribers: cfe-commits, echristo, jhen

Differential Revision: http://reviews.llvm.org/D16307

llvm-svn: 258174
This commit is contained in:
Justin Lebar 2016-01-19 19:52:21 +00:00
parent 0872e46c9d
commit 2836dcdb75
2 changed files with 49 additions and 5 deletions

View File

@ -10663,10 +10663,35 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
ArgStringList CmdArgs;
CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
// Clang's default optimization level is -O0, but ptxas's default is -O3.
CmdArgs.push_back(Args.MakeArgString(
llvm::Twine("-O") +
Args.getLastArgValue(options::OPT_O_Group, "0").data()));
// Map the -O we received to -O{0,1,2,3}.
//
// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's default,
// so it may correspond more closely to the spirit of clang -O2.
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
// -O3 seems like the least-bad option when -Osomething is specified to
// clang but it isn't handled below.
StringRef OOpt = "3";
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
OOpt = "3";
else if (A->getOption().matches(options::OPT_O0))
OOpt = "0";
else if (A->getOption().matches(options::OPT_O)) {
// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
OOpt = llvm::StringSwitch<const char *>(A->getValue())
.Case("1", "1")
.Case("2", "2")
.Case("3", "3")
.Case("s", "2")
.Case("z", "2")
.Default("2");
}
CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
} else {
// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
// to no optimizations, but ptxas's default is -O3.
CmdArgs.push_back("-O0");
}
// Don't bother passing -g to ptxas: It's enabled by default at -O0, and
// not supported at other optimization levels.

View File

@ -4,14 +4,31 @@
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
// Regular compile with -O2.
// Regular compiles with -O{0,1,2,3,4,fast}. -O4 and -Ofast map to ptxas O3.
// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT1 %s
// RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
// Regular compile without -O. This should result in us passing -O0 to ptxas.
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
// Regular compiles with -Os and -Oz. For lack of a better option, we map
// these to ptxas -O3.
// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
// Regular compile targeting sm_35.
// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
@ -42,7 +59,9 @@
// ARCH64: "-m64"
// ARCH32: "-m32"
// OPT0: "-O0"
// OPT1: "-O1"
// OPT2: "-O2"
// OPT3: "-O3"
// SM20: "--gpu-name" "sm_20"
// SM35: "--gpu-name" "sm_35"
// SM20: "--output-file" "[[CUBINFILE:[^"]*]]"