forked from OSchip/llvm-project
[CUDA] Added -f[no-]cuda-short-ptr option
The option enables use of 32-bit pointers for accessing const/local/shared memory. The feature is disabled by default. Differential Revision: https://reviews.llvm.org/D46148 llvm-svn: 331938
This commit is contained in:
parent
ac3951a735
commit
679dafe69e
|
@ -63,6 +63,10 @@ public:
|
|||
|
||||
/// If given, enables support for __int128_t and __uint128_t types.
|
||||
bool ForceEnableInt128 = false;
|
||||
|
||||
/// \brief If enabled, use 32-bit pointers for accessing const/local/shared
|
||||
/// address space.
|
||||
bool NVPTXUseShortPointers = false;
|
||||
};
|
||||
|
||||
} // end namespace clang
|
||||
|
|
|
@ -581,6 +581,9 @@ def fno_cuda_approx_transcendentals : Flag<["-"], "fno-cuda-approx-transcendenta
|
|||
def fcuda_rdc : Flag<["-"], "fcuda-rdc">, Flags<[CC1Option]>,
|
||||
HelpText<"Generate relocatable device code, also known as separate compilation mode.">;
|
||||
def fno_cuda_rdc : Flag<["-"], "fno-cuda-rdc">;
|
||||
def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
|
||||
HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">;
|
||||
def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
|
||||
def dA : Flag<["-"], "dA">, Group<d_Group>;
|
||||
def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"Print macro definitions in -E mode in addition to normal output">;
|
||||
|
|
|
@ -68,6 +68,9 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
|
|||
|
||||
if (TargetPointerWidth == 32)
|
||||
resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
|
||||
else if (Opts.NVPTXUseShortPointers)
|
||||
resetDataLayout(
|
||||
"e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
|
||||
else
|
||||
resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
|
||||
|
||||
|
|
|
@ -4714,6 +4714,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
|
||||
if (Args.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, false))
|
||||
CmdArgs.push_back("-fcuda-rdc");
|
||||
if (Args.hasFlag(options::OPT_fcuda_short_ptr,
|
||||
options::OPT_fno_cuda_short_ptr, false))
|
||||
CmdArgs.push_back("-fcuda-short-ptr");
|
||||
}
|
||||
|
||||
// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
|
||||
|
|
|
@ -635,8 +635,10 @@ void CudaToolChain::addClangTargetOptions(
|
|||
// CUDA-9.0 uses new instructions that are only available in PTX6.0+
|
||||
PtxFeature = "+ptx60";
|
||||
}
|
||||
CC1Args.push_back("-target-feature");
|
||||
CC1Args.push_back(PtxFeature);
|
||||
CC1Args.append({"-target-feature", PtxFeature});
|
||||
if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
|
||||
options::OPT_fno_cuda_short_ptr, false))
|
||||
CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
|
||||
|
||||
if (DeviceOffloadingKind == Action::OFK_OpenMP) {
|
||||
SmallVector<StringRef, 8> LibraryPaths;
|
||||
|
|
|
@ -2922,6 +2922,8 @@ static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args,
|
|||
Opts.Triple = llvm::sys::getDefaultTargetTriple();
|
||||
Opts.OpenCLExtensionsAsWritten = Args.getAllArgValues(OPT_cl_ext_EQ);
|
||||
Opts.ForceEnableInt128 = Args.hasArg(OPT_fforce_enable_int128);
|
||||
Opts.NVPTXUseShortPointers = Args.hasFlag(
|
||||
options::OPT_fcuda_short_ptr, options::OPT_fno_cuda_short_ptr, false);
|
||||
}
|
||||
|
||||
bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
|
||||
|
|
Loading…
Reference in New Issue