[OPENMP50]Support lastprivate conditional updates in inc/dec unary ops.

Added support for checking of updates of variables used in unary
pre(pos) inc/dec expressions.
This commit is contained in:
Alexey Bataev 2020-01-06 16:14:34 -05:00
parent 02f694b69a
commit 7b518dcb29
7 changed files with 117 additions and 32 deletions

View File

@ -1011,6 +1011,9 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
// Store the updated result through the lvalue.
EmitStoreOfComplex(IncVal, LV, /*init*/ false);
if (getLangOpts().OpenMP)
CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this,
E->getSubExpr());
// If this is a postinc, return the value read from memory, otherwise use the
// updated value.

View File

@ -2356,10 +2356,29 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
llvm_unreachable("Unknown SignedOverflowBehaviorTy");
}
namespace {
/// Handles check and update for lastprivate conditional variables.
class OMPLastprivateConditionalUpdateRAII {
private:
CodeGenFunction &CGF;
const UnaryOperator *E;
public:
OMPLastprivateConditionalUpdateRAII(CodeGenFunction &CGF,
const UnaryOperator *E)
: CGF(CGF), E(E) {}
~OMPLastprivateConditionalUpdateRAII() {
if (CGF.getLangOpts().OpenMP)
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(
CGF, E->getSubExpr());
}
};
} // namespace
llvm::Value *
ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre) {
OMPLastprivateConditionalUpdateRAII OMPRegion(CGF, E);
QualType type = E->getSubExpr()->getType();
llvm::PHINode *atomicPHI = nullptr;
llvm::Value *value;

View File

@ -11447,14 +11447,6 @@ CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
<< PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
Data.IVName = OS.str();
// Global loop counter. Required to handle inner parallel-for regions.
// global_iv = &iv;
QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType());
Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
CGF, PtrIVTy, Data.IVName);
LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, PtrIVTy);
CGF.EmitStoreOfScalar(IVLVal.getPointer(CGF), GlobIVLVal);
}
CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
@ -11463,6 +11455,27 @@ CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
}
void CGOpenMPRuntime::initLastprivateConditionalCounter(
CodeGenFunction &CGF, const OMPExecutableDirective &S) {
if (CGM.getLangOpts().OpenMPSimd ||
!llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
[](const OMPLastprivateClause *C) {
return C->getKind() == OMPC_LASTPRIVATE_conditional;
}))
return;
const CGOpenMPRuntime::LastprivateConditionalData &Data =
LastprivateConditionalStack.back();
if (Data.UseOriginalIV)
return;
// Global loop counter. Required to handle inner parallel-for regions.
// global_iv = iv;
Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
CGF, Data.IVLVal.getType(), Data.IVName);
LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
}
namespace {
/// Checks if the lastprivate conditional variable is referenced in LHS.
class LastprivateConditionalRefChecker final
@ -11576,10 +11589,9 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
// Global loop counter. Required to handle inner parallel-for regions.
// global_iv
if (!UseOriginalIV) {
QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType());
Address IVAddr = getAddrOfArtificialThreadPrivate(CGF, PtrIVTy, IVName);
IVLVal =
CGF.EmitLoadOfPointerLValue(IVAddr, PtrIVTy->castAs<PointerType>());
Address IVAddr =
getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
}
llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());

View File

@ -1716,6 +1716,11 @@ public:
/// current context.
bool isNontemporalDecl(const ValueDecl *VD) const;
/// Initializes global counter for lastprivate conditional.
virtual void
initLastprivateConditionalCounter(CodeGenFunction &CGF,
const OMPExecutableDirective &S);
/// Checks if the provided \p LVal is lastprivate conditional and emits the
/// code to update the value of the original variable.
/// \code

View File

@ -1887,6 +1887,7 @@ void CodeGenFunction::EmitOMPSimdFinal(
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
const OMPLoopDirective &S,
CodeGenFunction::JumpDest LoopExit) {
CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
CGF.EmitOMPLoopBody(S, LoopExit);
CGF.EmitStopPoint(&S);
}
@ -2007,6 +2008,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitOMPInnerLoop(
S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
[&S](CodeGenFunction &CGF) {
CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(
CGF, S);
CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
CGF.EmitStopPoint(&S);
},
@ -2661,6 +2664,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
: S.getCond(),
StaticChunkedOne ? S.getDistInc() : S.getInc(),
[&S, LoopExit](CodeGenFunction &CGF) {
CGF.CGM.getOpenMPRuntime()
.initLastprivateConditionalCounter(CGF, S);
CGF.EmitOMPLoopBody(S, LoopExit);
CGF.EmitStopPoint(&S);
},
@ -2843,6 +2848,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// break;
// }
// .omp.sections.exit:
CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
llvm::SwitchInst *SwitchStmt =
CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),

View File

@ -176,7 +176,7 @@ char cnt;
// CHECK-DAG: [[X:@.+]] = global double 0.0
// CHECK-DAG: [[F:@.+]] = global float 0.0
// CHECK-DAG: [[CNT:@.+]] = global i8 0
// OMP50-DAG: [[IV_REF:@.+]] = {{.*}}common global i32* null
// OMP50-DAG: [[IV_REF:@.+]] = {{.*}}common global i32 0
// OMP50-DAG: [[LAST_IV_F:@.+]] = {{.*}}common global i32 0
// OMP50-DAG: [[LAST_F:@.+]] = {{.*}}common global float 0.000000e+00,
@ -663,9 +663,6 @@ int main() {
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32** [[IV_REF]] to i8*), i64 8, i8*** @{{.+}})
// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32**
// OMP50: store i32* %{{.+}}, i32** [[BC]],
// CHECK: [[CNT_PRIV:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 1, i8* inttoptr (i64 3 to i8*))
// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
// UB = min(UB, GlobalUB)
@ -677,11 +674,13 @@ int main() {
// CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
// <Skip loop body>
// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32*
// OMP50: store i32 %{{.+}}, i32* [[BC]],
// CHECK: store float 0.000000e+00, float* [[F_PRIV:%.+]],
// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32** [[IV_REF]] to i8*), i64 8, i8*** @{{.+}})
// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32**
// OMP50: [[IV_ADDR:%.+]] = load i32*, i32** [[BC]],
// OMP50: [[IV:%.+]] = load i32, i32* [[IV_ADDR]],
// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32*
// OMP50: [[IV:%.+]] = load i32, i32* [[BC]],
// OMP50: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[F_REGION:@.+]])
// OMP50: [[LAST_IV:%.+]] = load i32, i32* [[LAST_IV_F]],
// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV]]

View File

@ -1,19 +1,35 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=OMP50
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=OMP50
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -DOMP5 -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
#ifdef OMP5
#define CONDITIONAL conditional :
#else
#define CONDITIONAL
#endif //OMP5
template <class T>
struct S {
T f;
@ -30,6 +46,9 @@ volatile int g = 1212;
// CHECK: [[S_INT_TY:%.+]] = type { i32 }
// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
// CHECK-DAG: [[X:@.+]] = global double 0.0
// OMP50-DAG: [[IV_REF:@.+]] = common global i32 0
// OMP50-DAG: [[LAST_IV_X:@.+]] = {{.*}}common global i32 0
// OMP50-DAG: [[LAST_X:@.+]] = {{.*}}common global double 0.000000e+00,
template <typename T>
T tmain() {
S<T> test;
@ -216,7 +235,7 @@ int main() {
}
}
#pragma omp parallel
#pragma omp sections lastprivate(A::x, B::x)
#pragma omp sections lastprivate(CONDITIONAL A::x, B::x)
{
A::x++;
#pragma omp section
@ -275,15 +294,37 @@ int main() {
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
// <Skip loop body>
// OMP50: [[IV_GLOB_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
// OMP50: [[BC:%.+]] = bitcast i8* [[IV_GLOB_REF]] to i32*
// OMP50: store i32 %{{.+}}, i32* [[BC]],
// OMP50: [[LOCAL_IV_REF:%.+]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* bitcast (i32* [[IV_REF]] to i8*), i64 4, i8*** @{{.+}})
// OMP50: [[BC:%.+]] = bitcast i8* [[LOCAL_IV_REF]] to i32*
// OMP50: [[IV:%.+]] = load i32, i32* [[BC]],
// OMP50: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[X_REGION:@.+]])
// OMP50: [[LAST_IV:%.+]] = load i32, i32* [[LAST_IV_X]],
// OMP50: [[CMP:%.+]] = icmp sle i32 [[LAST_IV]], [[IV]]
// OMP50: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]]
// OMP50: [[LP_THEN]]:
// OMP50: store i32 [[IV]], i32* [[LAST_IV_X]],
// OMP50: [[X_VAL:%.+]] = load double, double* [[X_PRIV]],
// OMP50: store double [[X_VAL]], double* [[LAST_X]],
// OMP50: br label %[[LP_DONE]]
// OMP50: [[LP_DONE]]:
// OMP50: call void @__kmpc_end_critical(%struct.ident_t* @{{.+}}, i32 [[GTID]], [8 x i32]* [[X_REGION]])
// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
// Check for final copying of private values back to original vars.
// CHECK: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
// CHECK: [[IS_LAST_ITER:%.+]] = icmp ne i32 [[IS_LAST_VAL]], 0
// OMP50-NEXT: call void @__kmpc_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
// CHECK: br i1 [[IS_LAST_ITER:%.+]], label %[[LAST_THEN:.+]], label %[[LAST_DONE:.+]]
// CHECK: [[LAST_THEN]]
// Actual copying.
// OMP50: [[X_VAL:%.+]] = load double, double* [[LAST_X]],
// OMP50: store double [[X_VAL]], double* [[X_PRIV]],
// original x=private_x;
// CHECK: [[X_VAL:%.+]] = load double, double* [[X_PRIV]],
// CHECK: store double [[X_VAL]], double* [[X]],