From c98699582a6333bbe76ff7853b4cd6beb45754cf Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 6 Jul 2020 19:13:37 -0500 Subject: [PATCH] [OpenMP][NFC] Remove unused (always fixed) arguments There are various runtime calls in the device runtime with unused, or always fixed, arguments. This is bad for all sorts of reasons. Clean up two before as we match them in OpenMPOpt now. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D83268 --- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 20 ++++++++----------- clang/test/OpenMP/nvptx_data_sharing.cpp | 4 ++-- clang/test/OpenMP/nvptx_parallel_codegen.cpp | 10 +++++----- clang/test/OpenMP/nvptx_target_codegen.cpp | 2 +- .../OpenMP/nvptx_target_teams_codegen.cpp | 4 ++-- .../nvptx_target_teams_distribute_codegen.cpp | 2 +- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 5 +++++ .../deviceRTLs/common/src/parallel.cu | 9 ++------- openmp/libomptarget/deviceRTLs/interface.h | 6 ++---- 9 files changed, 28 insertions(+), 34 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index cabd06bd76e8..cbd443134e7a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -38,11 +38,9 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, /// Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function, int16_t - /// IsOMPRuntimeInitialized); + /// *outlined_function); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// Call to bool __kmpc_kernel_parallel(void **outlined_function, - /// int16_t IsOMPRuntimeInitialized); + /// Call to bool __kmpc_kernel_parallel(void **outlined_function); OMPRTL_NVPTX__kmpc_kernel_parallel, /// Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -1466,8 +1464,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy)); // TODO: Optimize runtime initialization and pass in correct value. - llvm::Value *Args[] = {WorkFn.getPointer(), - /*RequiresOMPRuntime=*/Bld.getInt16(1)}; + llvm::Value *Args[] = {WorkFn.getPointer()}; llvm::Value *Ret = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); @@ -1595,17 +1592,16 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function, int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty}; + /// void *outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function, - /// int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty}; + /// Build bool __kmpc_kernel_parallel(void **outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); auto *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); @@ -2569,7 +2565,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); // Prepare for parallel region. Indicate the outlined function. - llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)}; + llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), Args); diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp index 2ee6bd2b4701..1372246c7fc8 100644 --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -55,7 +55,7 @@ void test_ds(){ // CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0 // CK1: [[B:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1 // CK1: store i32 10, i32* [[A]] -// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1) +// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}) // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS1]], i64 1) // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] // CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]], i64 0 @@ -65,7 +65,7 @@ void test_ds(){ // CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CK1: call void @__kmpc_end_sharing_variables() // CK1: store i32 100, i32* [[B]] -// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1) +// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}) // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS2]], i64 2) // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]], i64 0 diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index c8b15c8f6e3b..ad25e0d775d1 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -92,7 +92,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]] -// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]] +// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -166,13 +166,13 @@ int bar(int n){ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*), +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*)) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_serialized_parallel( // CHECK: {{call|invoke}} void [[PARALLEL_FN3:@.+]]( // CHECK: call void @__kmpc_end_serialized_parallel( -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*), +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*)) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK-64-DAG: load i32, i32* [[REF_A]] @@ -211,7 +211,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], +// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -291,7 +291,7 @@ int bar(int n){ // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*), +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*)) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) // CHECK: br label {{%?}}[[IF_END:.+]] diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 91f31185d8c1..56f04cb01f0a 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -612,7 +612,7 @@ int baz(int f, double &a) { // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) // CHECK: br label -// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) +// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*)) // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2) // CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]], // CHECK: [[REF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED]], i{{64|32}} 0 diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp index 3ab955fa8508..8ff393f074e4 100644 --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -68,7 +68,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -154,7 +154,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp index fe294bbddf2b..4f23f18730cc 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -88,7 +88,7 @@ int bar(int n){ // CHECK: [[I_ADDR:%.+]] = getelementptr inbounds [[GLOB_TY]], [[GLOB_TY]]* [[RD]], i32 0, i32 0 // // CHECK: call void @__kmpc_for_static_init_4( - // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) + // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*)) // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_VARS_PTR:%.+]], i{{64|32}} 1) // CHECK: [[SHARED_VARS_BUF:%.+]] = load i8**, i8*** [[SHARED_VARS_PTR]], // CHECK: [[VARS_BUF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED_VARS_BUF]], i{{64|32}} 0 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index f286403e657c..bf799a781ae1 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -584,6 +584,11 @@ __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr, __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) +/// Note that device runtime functions (in the following) do not necessarily +/// need attributes as we expect to see the definitions. +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) + __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL diff --git a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu index 4f3c3ac0c08a..20b03e9bab1b 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu @@ -72,10 +72,8 @@ INLINE static uint16_t determineNumberOfThreads(uint16_t NumThreadsClause, } // This routine is always called by the team master.. -EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, - int16_t IsOMPRuntimeInitialized) { +EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn) { PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n"); - ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized, "Expected initialized runtime."); omptarget_nvptx_workFn = WorkFn; @@ -120,12 +118,9 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, // returns True if this thread is active, else False. // // Only the worker threads call this routine. -EXTERN bool __kmpc_kernel_parallel(void **WorkFn, - int16_t IsOMPRuntimeInitialized) { +EXTERN bool __kmpc_kernel_parallel(void **WorkFn) { PRINT0(LD_IO | LD_PAR, "call to __kmpc_kernel_parallel\n"); - ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized, "Expected initialized runtime."); - // Work function and arguments for L1 parallel region. *WorkFn = omptarget_nvptx_workFn; diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h index 39ce73cba957..4d352bc648fa 100644 --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -424,10 +424,8 @@ EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); -EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, - int16_t IsOMPRuntimeInitialized); -EXTERN bool __kmpc_kernel_parallel(void **WorkFn, - int16_t IsOMPRuntimeInitialized); +EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn); +EXTERN bool __kmpc_kernel_parallel(void **WorkFn); EXTERN void __kmpc_kernel_end_parallel(); EXTERN void __kmpc_data_sharing_init_stack();