forked from OSchip/llvm-project
[libomptarget][nvptx] Undef, weak shared variables
[libomptarget][nvptx] Undef, weak shared variables Shared variables on nvptx, and LDS on amdgcn, are uninitialized at the start of kernel execution. Therefore create the variables with undef instead of zeros, motivated in part by the amdgcn back end rejecting LDS+initializer. Common is zero initialized, which seems incompatible with shared. Thus change them to weak, following the direction of https://reviews.llvm.org/rG7b3eabdcd215 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D90248
This commit is contained in:
parent
afc44efc26
commit
5d02ca49a2
|
@ -1102,7 +1102,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
|
|||
KernelStaticGlobalized = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
|
||||
llvm::UndefValue::get(CGM.VoidPtrTy),
|
||||
"_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
|
||||
llvm::GlobalValue::NotThreadLocal,
|
||||
CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
|
||||
|
@ -1234,7 +1234,7 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
|
|||
KernelStaticGlobalized = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
|
||||
llvm::UndefValue::get(CGM.VoidPtrTy),
|
||||
"_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
|
||||
llvm::GlobalValue::NotThreadLocal,
|
||||
CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
|
||||
|
@ -2855,8 +2855,8 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
|
|||
auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
|
||||
unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
|
||||
TransferMedium = new llvm::GlobalVariable(
|
||||
M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
|
||||
llvm::Constant::getNullValue(Ty), TransferMediumName,
|
||||
M, Ty, /*isConstant=*/false, llvm::GlobalVariable::WeakAnyLinkage,
|
||||
llvm::UndefValue::get(Ty), TransferMediumName,
|
||||
/*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
|
||||
SharedAddressSpace);
|
||||
CGM.addCompilerUsedGlobal(TransferMedium);
|
||||
|
@ -4791,8 +4791,8 @@ void CGOpenMPRuntimeGPU::clear() {
|
|||
llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
|
||||
auto *GV = new llvm::GlobalVariable(
|
||||
CGM.getModule(), LLVMStaticTy,
|
||||
/*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
|
||||
llvm::Constant::getNullValue(LLVMStaticTy),
|
||||
/*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage,
|
||||
llvm::UndefValue::get(LLVMStaticTy),
|
||||
"_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
|
||||
llvm::GlobalValue::NotThreadLocal,
|
||||
C.getTargetAddressSpace(LangAS::cuda_shared));
|
||||
|
|
|
@ -28,8 +28,8 @@ void test_ds(){
|
|||
}
|
||||
}
|
||||
// SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
||||
// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i64 8
|
||||
// SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
|
|
|
@ -35,8 +35,8 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
|
||||
// SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
||||
// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 40
|
||||
// SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
// CHECK-DAG: @__omp_offloading_{{.*}}_main_[[LINE:l.+]]_exec_mode = weak constant i8 0
|
||||
|
|
|
@ -76,8 +76,8 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
||||
// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
|
|
|
@ -32,8 +32,8 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
||||
// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#define HEADER
|
||||
|
||||
// Check for the data transfer medium in shared memory to transfer the reduction list to the first warp.
|
||||
// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = common addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32]
|
||||
// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = weak addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32]
|
||||
|
||||
// Check that the execution mode of all 3 target regions is set to Spmd Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak constant i8 0
|
||||
|
|
|
@ -93,8 +93,8 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// SEQ-DAG: [[MEM_TY:%.+]] = type { [128 x i8] }
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
||||
// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
|
|
|
@ -76,8 +76,8 @@ int bar(int n){
|
|||
}
|
||||
|
||||
// SEQ-DAG: [[MEM_TY:%.+]] = type { [128 x i8] }
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
||||
// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
||||
|
|
|
@ -30,8 +30,8 @@ int main (int argc, char **argv) {
|
|||
}
|
||||
|
||||
// SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
||||
// SEQ-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// SEQ-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}}
|
||||
// SEQ-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
@ -119,8 +119,8 @@ int main (int argc, char **argv) {
|
|||
}
|
||||
|
||||
// SEQ2: [[MEM_TY:%.+]] = type { [128 x i8] }
|
||||
// SEQ2-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
|
||||
// SEQ2-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// SEQ2-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
|
||||
// SEQ2-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
||||
// SEQ2-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4
|
||||
// SEQ2-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}}
|
||||
// SEQ2-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
|
||||
|
|
|
@ -17,14 +17,14 @@
|
|||
// CHECK-DAG: [[TEAMS_REDUCE_UNION_TY:%.+]] = type { [[TEAM1_REDUCE_TY]] }
|
||||
// SEQ-DAG: [[MAP_TY:%.+]] = type { [128 x i8] }
|
||||
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
|
||||
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
||||
// SEQ-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
|
||||
// SEQ-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1
|
||||
// SEQ-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} {{16|8}}
|
||||
// SEQ-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} 16
|
||||
|
||||
// Check for the data transfer medium in shared memory to transfer the reduction list to the first warp.
|
||||
// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = common addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32]
|
||||
// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = weak addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32]
|
||||
|
||||
// Check that the execution mode of 2 target regions is set to Non-SPMD and the 3rd is in SPMD.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 1
|
||||
|
|
Loading…
Reference in New Issue