From 89e7e8eb0e18316647ccf1953154ffa29f97f545 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 17 Jun 2015 06:21:39 +0000 Subject: [PATCH] [OPENMP] Supported reduction clause in omp simd construct. The following code is generated for reduction clause within 'omp simd' loop construct: #pragma omp simd reduction(op:var) for (...) alloca priv_var priv_var = ; : // references to original 'var' are replaced by 'priv_var' : var op= priv_var; llvm-svn: 239881 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 15 +++++++++++- clang/lib/CodeGen/CGOpenMPRuntime.h | 2 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 6 ++++- clang/test/OpenMP/simd_codegen.cpp | 33 ++++++++++++++++++++++++--- 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 1238accf42d7..269799dffc1c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2242,7 +2242,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps, - bool WithNowait) { + bool WithNowait, bool SimpleReduction) { // Next code should be emitted for reduction: // // static kmp_critical_name lock = { 0 }; @@ -2272,9 +2272,22 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // break; // default:; // } + // + // if SimpleReduction is true, only the next code is generated: + // ... + // [i] = RedOp(*[i], *[i]); + // ... auto &C = CGM.getContext(); + if (SimpleReduction) { + CodeGenFunction::RunCleanupsScope Scope(CGF); + for (auto *E : ReductionOps) { + CGF.EmitIgnoredExpr(E); + } + return; + } + // 1. Build a list of reduction variables. // void *RedList[] = {[0], ..., [-1]}; llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size()); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index f5aa4a51df93..4db3db4ae9d2 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -632,7 +632,7 @@ public: ArrayRef LHSExprs, ArrayRef RHSExprs, ArrayRef ReductionOps, - bool WithNowait); + bool WithNowait, bool SimpleReduction); /// \brief Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 907fe93efc46..ed960fa858d1 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -443,7 +443,9 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( CGM.getOpenMPRuntime().emitReduction( *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, D.getSingleClause(OMPC_nowait) || - isOpenMPParallelDirective(D.getDirectiveKind())); + isOpenMPParallelDirective(D.getDirectiveKind()) || + D.getDirectiveKind() == OMPD_simd, + D.getDirectiveKind() == OMPD_simd); } } @@ -807,6 +809,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); EmitPrivateLinearVars(CGF, S, LoopScope); CGF.EmitOMPPrivateClause(S, LoopScope); + CGF.EmitOMPReductionClauseInit(S, LoopScope); HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), @@ -820,6 +823,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { if (HasLastprivateClause) { CGF.EmitOMPLastprivateClauseFinal(S); } + CGF.EmitOMPReductionClauseFinal(S); } CGF.EmitOMPSimdFinal(S); // Emit: if (PreCond) - end. diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp index 0a5b38a0ce32..4bd06a7fdf24 100644 --- a/clang/test/OpenMP/simd_codegen.cpp +++ b/clang/test/OpenMP/simd_codegen.cpp @@ -185,9 +185,6 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: store i32 -1, i32* [[A:%.+]], A = -1; #pragma omp simd lastprivate(A) -// Clause 'lastprivate' implementation is not completed yet. -// Test checks that one iteration is separated in presence of lastprivate. -// // CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]] // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]] // CHECK: [[SIMD_LOOP7_COND]] @@ -212,6 +209,36 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[SIMPLE_LOOP7_END]] // CHECK-NEXT: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]], // CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* [[A]], + int R; + // CHECK: store i32 -1, i32* [[R:%.+]], + R = -1; +// CHECK: store i64 0, i64* [[OMP_IV8:%[^,]+]] +// CHECK: store i32 1, i32* [[R_PRIV:%.+]], + #pragma omp simd reduction(*:R) +// CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]] +// CHECK: [[SIMD_LOOP8_COND]] +// CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID:[0-9]+]] +// CHECK-NEXT: [[CMP8:%.+]] = icmp slt i64 [[IV8]], 7 +// CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]] + for (long long i = -10; i < 10; i += 3) { +// CHECK: [[SIMPLE_LOOP8_BODY]] +// Start of body: calculate i from IV: +// CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3 +// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]] +// CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] + R *= i; +// CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] +// CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1 +// CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]] + } +// CHECK: [[SIMPLE_LOOP8_END]] +// CHECK-NEXT: [[R_VAL:%.+]] = load i32, i32* [[R]], +// CHECK-NEXT: [[R_PRIV_VAL:%.+]] = load i32, i32* [[R_PRIV]], +// CHECK-NEXT: [[RED:%.+]] = mul nsw i32 [[R_VAL]], [[R_PRIV_VAL]] +// CHECK-NEXT: store i32 [[RED]], i32* [[R]], // CHECK-NEXT: ret void }