diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 60fbf6681148..3bd881263565 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -148,19 +148,35 @@ public: /// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry /// to the target region and used by containing directives such as 'parallel' /// to emit optimized code. -class ExecutionModeRAII { +class ExecutionRuntimeModesRAII { private: - CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode; - CGOpenMPRuntimeNVPTX::ExecutionMode &Mode; + CGOpenMPRuntimeNVPTX::ExecutionMode SavedExecMode = + CGOpenMPRuntimeNVPTX::EM_Unknown; + CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode; + bool SavedRuntimeMode = false; + bool *RuntimeMode = nullptr; public: - ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD) - : Mode(Mode) { - SavedMode = Mode; - Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD - : CGOpenMPRuntimeNVPTX::EM_NonSPMD; + /// Constructor for Non-SPMD mode. + ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode) + : ExecMode(ExecMode) { + SavedExecMode = ExecMode; + ExecMode = CGOpenMPRuntimeNVPTX::EM_NonSPMD; + } + /// Constructor for SPMD mode. + ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode, + bool &RuntimeMode, bool FullRuntimeMode) + : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) { + SavedExecMode = ExecMode; + SavedRuntimeMode = RuntimeMode; + ExecMode = CGOpenMPRuntimeNVPTX::EM_SPMD; + RuntimeMode = FullRuntimeMode; + } + ~ExecutionRuntimeModesRAII() { + ExecMode = SavedExecMode; + if (RuntimeMode) + *RuntimeMode = SavedRuntimeMode; } - ~ExecutionModeRAII() { Mode = SavedMode; } }; /// GPU Configuration: This information can be derived from cuda registers, @@ -1187,7 +1203,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false); + ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode); EntryFunctionState EST; WorkerFunctionState WST(CGM, D.getBeginLoc()); Work.clear(); @@ -1319,7 +1335,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true); + ExecutionRuntimeModesRAII ModeRAII( + CurrentExecutionMode, RequiresFullRuntime, + CGM.getLangOpts().OpenMPCUDAForceFullRuntime || + !supportsLightweightRuntime(CGM.getContext(), D)); EntryFunctionState EST; // Emit target region as a standalone region. @@ -1370,9 +1389,6 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute"); EST.ExitBB = CGF.createBasicBlock(".exit"); - // Initialize the OMP state in the runtime; called by all active threads. - bool RequiresFullRuntime = CGM.getLangOpts().OpenMPCUDAForceFullRuntime || - !supportsLightweightRuntime(CGF.getContext(), D); llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), /*RequiresOMPRuntime=*/ Bld.getInt16(RequiresFullRuntime ? 1 : 0), @@ -1919,7 +1935,18 @@ static const ModeFlagsTy UndefinedMode = } // anonymous namespace unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const { - return UndefinedMode; + switch (getExecutionMode()) { + case EM_SPMD: + if (requiresFullRuntime()) + return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE); + return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE; + case EM_NonSPMD: + assert(requiresFullRuntime() && "Expected full runtime."); + return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE); + case EM_Unknown: + return UndefinedMode; + } + llvm_unreachable("Unknown flags are requested."); } CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index b03ff782d75e..8ba2759c8b3d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -56,6 +56,8 @@ private: ExecutionMode getExecutionMode() const; + bool requiresFullRuntime() const { return RequiresFullRuntime; } + /// Emit the worker function for the current target region. void emitWorkerFunction(WorkerFunctionState &WST); @@ -378,6 +380,9 @@ private: /// to emit optimized code. ExecutionMode CurrentExecutionMode = EM_Unknown; + /// Check if the full runtime is required (default - yes). + bool RequiresFullRuntime = true; + /// true if we're emitting the code for the target region and next parallel /// region is L0 for sure. bool IsInTargetMasterThreadRegion = false; diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp index 97481e8d6702..738bbf34f725 100644 --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -9,20 +9,40 @@ #define HEADER // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 -// CHECK-DAG: private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 2, i32 0, i8* getelementptr inbounds -// CHECK-DAG: private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 2, i32 0, i8* getelementptr inbounds -// CHECK-DAG: private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds -// CHECK-DAG: private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 2, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 0, i8* getelementptr inbounds // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for simd for (int i = 0; i < 10; ++i) ; @@ -46,12 +66,29 @@ void foo() { ; int a; // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for lastprivate(a) for (int i = 0; i < 10; ++i) a = i; @@ -74,12 +111,29 @@ int a; for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] #pragma omp target teams { int b; @@ -120,12 +174,29 @@ int a; for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] #pragma omp target teams #pragma omp distribute parallel for for (int i = 0; i < 10; ++i) @@ -155,12 +226,29 @@ int a; for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[DISTR_LIGHT]] +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[DISTR_FULL]] +// CHECK-DAG: [[FULL]] #pragma omp target #pragma omp teams #pragma omp distribute parallel for @@ -197,12 +285,22 @@ int a; for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] #pragma omp target parallel for for (int i = 0; i < 10; ++i) ; @@ -225,12 +323,29 @@ int a; for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] +// CHECK-DAG: [[BAR_LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] +// CHECK-DAG: [[BAR_LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] +// CHECK-DAG: [[BAR_LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] +// CHECK-DAG: [[BAR_FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] +// CHECK-DAG: [[BAR_FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] +// CHECK-DAG: [[BAR_FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] +// CHECK-DAG: [[BAR_FULL]] #pragma omp target parallel #pragma omp for simd for (int i = 0; i < 10; ++i) @@ -260,12 +375,28 @@ int a; for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] +// CHECK-DAG: [[BAR_FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] +// CHECK-DAG: [[BAR_LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] +// CHECK-DAG: [[BAR_LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] +// CHECK-DAG: [[BAR_FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] +// CHECK-DAG: [[BAR_FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] +// CHECK-DAG: [[BAR_FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] +// CHECK-DAG: [[BAR_FULL]] #pragma omp target #pragma omp parallel #pragma omp for simd ordered @@ -302,12 +433,22 @@ int a; for (int i = 0; i < 10; ++i) ; // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0) +// CHECK-DAG: [[FOR_LIGHT]] +// CHECK-DAG: [[LIGHT]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}}) +// CHECK-DAG: [[FULL]] #pragma omp target #pragma omp parallel for for (int i = 0; i < 10; ++i) diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 5e7cf7f92891..4c17361e44b9 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -9,12 +9,14 @@ #define HEADER // Check that the execution mode of all 6 target regions is set to Generic Mode. -// CHECK-DAG: {{@__omp_offloading_.+l103}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l180}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l290}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l328}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l346}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l311}}_exec_mode = weak constant i8 1 +// CHECK-DAG: [[NONSPMD:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[UNKNOWN:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds +// CHECK-DAG: {{@__omp_offloading_.+l105}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l182}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l292}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l330}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l348}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l313}}_exec_mode = weak constant i8 1 __thread int id; @@ -36,7 +38,7 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l103}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l105}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, // CHECK: store i8* null, i8** [[OMP_WORK_FN]], @@ -67,7 +69,7 @@ int foo(int n) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l103]]() + // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l105]]() // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() @@ -109,7 +111,7 @@ int foo(int n) { { } - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l180}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l182}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, // CHECK: store i8* null, i8** [[OMP_WORK_FN]], @@ -140,7 +142,7 @@ int foo(int n) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+foo.+l180]](i[[SZ:32|64]] [[ARG1:%[a-zA-Z_]+]], i[[SZ:32|64]] [[ID:%[a-zA-Z_]+]]) + // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+foo.+l182]](i[[SZ:32|64]] [[ARG1:%[a-zA-Z_]+]], i[[SZ:32|64]] [[ID:%[a-zA-Z_]+]]) // CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], // CHECK: store i[[SZ]] [[ARG1]], i[[SZ]]* [[AA_ADDR]], // CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* @@ -183,7 +185,7 @@ int foo(int n) { id = aa; } - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l290}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l292}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, // CHECK: store i8* null, i8** [[OMP_WORK_FN]], @@ -214,7 +216,7 @@ int foo(int n) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l290]](i[[SZ]] + // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l292]](i[[SZ]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] // CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]* @@ -375,7 +377,7 @@ int baz(int f, double &a) { return f; } - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+328}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+330}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, // CHECK: store i8* null, i8** [[OMP_WORK_FN]], @@ -406,7 +408,7 @@ int baz(int f, double &a) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+static.+l328]](i[[SZ]] + // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+static.+l330]](i[[SZ]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] // CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] @@ -461,10 +463,10 @@ int baz(int f, double &a) { - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l346}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l348}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, - // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* + // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[NONSPMD]] // CHECK: store i8* null, i8** [[OMP_WORK_FN]], // CHECK: store i8 0, i8* [[OMP_EXEC_STATUS]], // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] @@ -495,7 +497,7 @@ int baz(int f, double &a) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+S1.+l346]]( + // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+S1.+l348]]( // Create local storage for each capture. // CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1:%struct.*]]* // CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]] @@ -558,8 +560,8 @@ int baz(int f, double &a) { // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32, // CHECK: [[ZERO_ADDR:%.+]] = alloca i32, // CHECK: store i32 0, i32* [[ZERO_ADDR]] - // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* - // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @0, i32 [[GTID]]) + // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[UNKNOWN]] + // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) // CHECK: [[IS_TTD:%.+]] = icmp eq i16 %1, 0 // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode() // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0 @@ -584,13 +586,13 @@ int baz(int f, double &a) { // CHECK: icmp ne i8 [[RES]], 0 // CHECK: br i1 - // CHECK: [[RES:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @{{.+}}, i32 [[GTID]]) + // CHECK: [[RES:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) // CHECK: icmp ne i16 [[RES]], 0 // CHECK: br i1 - // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]]) + // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) // CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}}) - // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]]) + // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]]) // CHECK: br label // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) @@ -614,7 +616,7 @@ int baz(int f, double &a) { // CHECK: ret i32 [[RES]] - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l311}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l313}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, // CHECK: store i8* null, i8** [[OMP_WORK_FN]], @@ -645,7 +647,7 @@ int baz(int f, double &a) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l311]](i[[SZ]] + // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l313]](i[[SZ]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] // CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] diff --git a/clang/test/OpenMP/nvptx_target_printf_codegen.c b/clang/test/OpenMP/nvptx_target_printf_codegen.c index 098c8e165ff5..a68a9fc8cd9d 100644 --- a/clang/test/OpenMP/nvptx_target_printf_codegen.c +++ b/clang/test/OpenMP/nvptx_target_printf_codegen.c @@ -6,10 +6,11 @@ // expected-no-diagnostics extern int printf(const char *, ...); -// CHECK-DAG: private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds +// CHECK-DAG: private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds // Check a simple call to printf end-to-end. // CHECK-DAG: [[SIMPLE_PRINTF_TY:%[a-zA-Z0-9_]+]] = type { i32, i64, double } +// CHECK-NOT: private unnamed_addr constant %struct.ident_t { i32 0, i32 2, {{1|2|3}} int CheckSimple() { // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+CheckSimple.+]]_worker() #pragma omp target