forked from OSchip/llvm-project
[CUDA, MemCpyOpt] Add a flag to force-enable memcpyopt and use it for CUDA.
Attempt to enable MemCpyOpt unconditionally in D104801 uncovered the fact that there are users that do not expect LLVM to materialize `memset` intrinsic. While other passes can do that, too, MemCpyOpt triggers it more frequently and breaks sanitizers and some downstream users. For now introduce a flag to force-enable the flag and opt-in only CUDA compilation with NVPTX back-end. Differential Revision: https://reviews.llvm.org/D106401
This commit is contained in:
parent
f59f659879
commit
6a9cf21f5a
|
@ -685,7 +685,8 @@ void CudaToolChain::addClangTargetOptions(
|
|||
"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
|
||||
|
||||
if (DeviceOffloadingKind == Action::OFK_Cuda) {
|
||||
CC1Args.push_back("-fcuda-is-device");
|
||||
CC1Args.append(
|
||||
{"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
|
||||
|
||||
if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
|
||||
options::OPT_fno_cuda_approx_transcendentals, false))
|
||||
|
|
|
@ -67,6 +67,10 @@ using namespace llvm;
|
|||
|
||||
#define DEBUG_TYPE "memcpyopt"
|
||||
|
||||
static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
|
||||
"enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable memcpyopt even when libcalls are disabled"));
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden,
|
||||
cl::desc("Use MemorySSA-backed MemCpyOpt."));
|
||||
|
@ -677,8 +681,9 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
|||
// the corresponding libcalls are not available.
|
||||
// TODO: We should really distinguish between libcall availability and
|
||||
// our ability to introduce intrinsics.
|
||||
if (T->isAggregateType() && TLI->has(LibFunc_memcpy) &&
|
||||
TLI->has(LibFunc_memmove)) {
|
||||
if (T->isAggregateType() &&
|
||||
(EnableMemCpyOptWithoutLibcalls ||
|
||||
(TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
|
||||
MemoryLocation LoadLoc = MemoryLocation::get(LI);
|
||||
|
||||
// We use alias analysis to check if an instruction may store to
|
||||
|
@ -806,7 +811,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
|||
// this if the corresponding libfunc is not available.
|
||||
// TODO: We should really distinguish between libcall availability and
|
||||
// our ability to introduce intrinsics.
|
||||
if (!TLI->has(LibFunc_memset))
|
||||
if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
|
||||
return false;
|
||||
|
||||
// There are two cases that are interesting for this code to handle: memcpy
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -memcpyopt < %s | FileCheck %s --check-prefixes=CHECK,LIBCALLS
|
||||
; RUN: opt -S -memcpyopt -mtriple=amdgcn-- < %s | FileCheck %s --check-prefixes=CHECK,NO-LIBCALLS
|
||||
; RUN: opt -S -memcpyopt -mtriple=amdgcn-- -enable-memcpyopt-without-libcalls < %s \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK,LIBCALLS
|
||||
|
||||
; REQUIRES: amdgpu-registered-target
|
||||
|
||||
|
|
Loading…
Reference in New Issue