[OPENMP] Remove extra sync barriers for 'firstprivate' clause.

Sync barrier will be emitted after generation of firstprivate variables
only if one of the firstprivate vars is used in lastprivate clause.

llvm-svn: 260877
This commit is contained in:
Alexey Bataev 2016-02-15 08:07:17 +00:00
parent 3cc265458f
commit cd8b6a2cf1
5 changed files with 40 additions and 43 deletions

View File

@ -376,14 +376,23 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope) {
if (!HaveInsertPoint())
return false;
bool FirstprivateIsLastprivate = false;
llvm::DenseSet<const VarDecl *> Lastprivates;
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
for (const auto *D : C->varlists())
Lastprivates.insert(
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
}
llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
auto IRef = C->varlist_begin();
auto InitsRef = C->inits().begin();
for (auto IInit : C->private_copies()) {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
if (EmittedAsFirstprivate.count(OrigVD) == 0) {
EmittedAsFirstprivate.insert(OrigVD);
FirstprivateIsLastprivate =
FirstprivateIsLastprivate ||
(Lastprivates.count(OrigVD->getCanonicalDecl()) > 0);
if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
bool IsRegistered;
@ -443,7 +452,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
++IRef, ++InitsRef;
}
}
return !EmittedAsFirstprivate.empty();
return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
}
void CodeGenFunction::EmitOMPPrivateClause(
@ -490,7 +499,6 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
QualType Type = VD->getType();
if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
// Get the address of the master variable. If we are emitting code with
// TLS support, the address is passed from the master as field in the
// captured declaration.
@ -964,12 +972,11 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
OMPPrivateScope PrivateScope(CGF);
bool Copyins = CGF.EmitOMPCopyinClause(S);
bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
if (Copyins || Firstprivates) {
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
if (Copyins) {
// Emit implicit barrier to synchronize threads and avoid data races on
// initialization of firstprivate variables or propagation master's thread
// values of threadprivate variables to local instances of that variables
// of all other implicit threads.
// propagation master's thread values of threadprivate variables to local
// instances of that variables of all other implicit threads.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
@ -1605,7 +1612,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
OMPPrivateScope LoopScope(*this);
if (EmitOMPFirstprivateClause(S, LoopScope)) {
// Emit implicit barrier to synchronize threads and avoid data races on
// initialization of firstprivate variables.
// initialization of firstprivate variables and post-update of
// lastprivate variables.
CGM.getOpenMPRuntime().emitBarrierCall(
*this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
@ -1804,7 +1812,8 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CodeGenFunction::OMPPrivateScope LoopScope(CGF);
if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
// Emit implicit barrier to synchronize threads and avoid data races on
// initialization of firstprivate variables.
// initialization of firstprivate variables and post-update of lastprivate
// variables.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
@ -1883,8 +1892,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
llvm::SmallVector<const Expr *, 8> SrcExprs;
llvm::SmallVector<const Expr *, 8> AssignmentOps;
// Check if there are any 'copyprivate' clauses associated with this
// 'single'
// construct.
// 'single' construct.
// Build a list of copyprivate variables along with helper expressions
// (<source>, <destination>, <destination>=<source> expressions)
for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
@ -1897,10 +1905,9 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
}
LexicalScope Scope(*this, S.getSourceRange());
// Emit code for 'single' region along with 'copyprivate' clauses
bool HasFirstprivates;
auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) {
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
CodeGenFunction::OMPPrivateScope SingleScope(CGF);
HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
(void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
CGF.EmitOMPPrivateClause(S, SingleScope);
(void)SingleScope.Privatize();
@ -1909,10 +1916,9 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
CopyprivateVars, DestExprs, SrcExprs,
AssignmentOps);
// Emit an implicit barrier at the end (to avoid data race on firstprivate
// init or if no 'nowait' clause was specified and no 'copyprivate' clause).
if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) &&
CopyprivateVars.empty()) {
// Emit an implicit barrier at the end (to avoid data race if no 'nowait'
// clause was specified and no 'copyprivate' clause).
if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
CGM.getOpenMPRuntime().emitBarrierCall(
*this, S.getLocStart(),
S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);

View File

@ -95,7 +95,7 @@ int main() {
// LAMBDA: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR_REF]]
// LAMBDA: store i{{[0-9]+}} [[SIVAR2_VAL]], i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]]
// LAMBDA: call void @__kmpc_barrier(
// LAMBDA-NOT: call void @__kmpc_barrier(
g = 1;
g1 = 1;
sivar = 2;
@ -158,7 +158,7 @@ int main() {
// BLOCKS: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF_ADDRR]]
// BLOCKS: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]]
// BLOCKS: call void @__kmpc_barrier(
// BLOCKS-NOT: call void @__kmpc_barrier(
g = 1;
g1 =1;
sivar = 2;
@ -246,7 +246,7 @@ int main() {
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]]
// Synchronization for initialization.
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void @__kmpc_for_static_fini(
@ -310,10 +310,8 @@ int main() {
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
// Synchronization for initialization.
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// No synchronization for initialization.
// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void @__kmpc_for_static_fini(

View File

@ -31,7 +31,6 @@ struct S {
// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
template <typename T>
T tmain() {
@ -70,7 +69,7 @@ int main() {
// LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
// LAMBDA: call {{.*}}void @__kmpc_barrier(
// LAMBDA-NOT: call {{.*}}void @__kmpc_barrier(
g = 1;
sivar = 2;
// LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
@ -115,7 +114,7 @@ int main() {
// BLOCK: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_REF_ADDR]]
// BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]],
// BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
// BLOCKS: call {{.*}}void @__kmpc_barrier(
// BLOCKS-NOT: call {{.*}}void @__kmpc_barrier(
g = 1;
sivar = 2;
// BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
@ -244,9 +243,7 @@ int main() {
// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-NOT: call {{.*}}void @__kmpc_barrier(
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
// CHECK: ret void

View File

@ -59,7 +59,6 @@ S<float> s_arr[] = {1, 2};
// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
S<float> var(3);
// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
@ -94,7 +93,7 @@ int main() {
// LAMBDA: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]]
// LAMBDA: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]]
// LAMBDA: call void @__kmpc_barrier(
// LAMBDA-NOT: call void @__kmpc_barrier(
{
g = 1;
sivar = 10;
@ -154,7 +153,7 @@ int main() {
// BLOCKS: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]],
// BLOCKS: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
// BLOCKS: call void @__kmpc_barrier(
// BLOCKS-NOT: call void @__kmpc_barrier(
{
g = 1;
sivar = 10;
@ -242,7 +241,7 @@ int main() {
// CHECK: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR]],
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]],
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void @__kmpc_for_static_fini(
@ -304,10 +303,8 @@ int main() {
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
// Synchronization for initialization.
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// No synchronization for initialization.
// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void @__kmpc_for_static_fini(

View File

@ -57,7 +57,6 @@ int vec[] = {1, 2};
S<float> s_arr[] = {1, 2};
// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
S<float> var(3);
// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
// CHECK-DAG: [[SINGLE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
@ -215,7 +214,7 @@ int main() {
// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
// CHECK: call void @__kmpc_end_single(
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-NOT: call void @__kmpc_barrier(
// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()