forked from OSchip/llvm-project
[OPENMP] Remove extra sync barriers for 'firstprivate' clause.
Sync barrier will be emitted after generation of firstprivate variables only if one of the firstprivate vars is used in lastprivate clause. llvm-svn: 260877
This commit is contained in:
parent
3cc265458f
commit
cd8b6a2cf1
|
@ -376,14 +376,23 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
|
|||
OMPPrivateScope &PrivateScope) {
|
||||
if (!HaveInsertPoint())
|
||||
return false;
|
||||
bool FirstprivateIsLastprivate = false;
|
||||
llvm::DenseSet<const VarDecl *> Lastprivates;
|
||||
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
|
||||
for (const auto *D : C->varlists())
|
||||
Lastprivates.insert(
|
||||
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
|
||||
}
|
||||
llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
|
||||
for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
|
||||
auto IRef = C->varlist_begin();
|
||||
auto InitsRef = C->inits().begin();
|
||||
for (auto IInit : C->private_copies()) {
|
||||
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
||||
if (EmittedAsFirstprivate.count(OrigVD) == 0) {
|
||||
EmittedAsFirstprivate.insert(OrigVD);
|
||||
FirstprivateIsLastprivate =
|
||||
FirstprivateIsLastprivate ||
|
||||
(Lastprivates.count(OrigVD->getCanonicalDecl()) > 0);
|
||||
if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
|
||||
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
|
||||
auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
|
||||
bool IsRegistered;
|
||||
|
@ -443,7 +452,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
|
|||
++IRef, ++InitsRef;
|
||||
}
|
||||
}
|
||||
return !EmittedAsFirstprivate.empty();
|
||||
return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPPrivateClause(
|
||||
|
@ -490,7 +499,6 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
|
|||
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
|
||||
QualType Type = VD->getType();
|
||||
if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
|
||||
|
||||
// Get the address of the master variable. If we are emitting code with
|
||||
// TLS support, the address is passed from the master as field in the
|
||||
// captured declaration.
|
||||
|
@ -964,12 +972,11 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
|
|||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
OMPPrivateScope PrivateScope(CGF);
|
||||
bool Copyins = CGF.EmitOMPCopyinClause(S);
|
||||
bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
||||
if (Copyins || Firstprivates) {
|
||||
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
|
||||
if (Copyins) {
|
||||
// Emit implicit barrier to synchronize threads and avoid data races on
|
||||
// initialization of firstprivate variables or propagation master's thread
|
||||
// values of threadprivate variables to local instances of that variables
|
||||
// of all other implicit threads.
|
||||
// propagation master's thread values of threadprivate variables to local
|
||||
// instances of that variables of all other implicit threads.
|
||||
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
|
||||
CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
|
||||
/*ForceSimpleCall=*/true);
|
||||
|
@ -1605,7 +1612,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
|
|||
OMPPrivateScope LoopScope(*this);
|
||||
if (EmitOMPFirstprivateClause(S, LoopScope)) {
|
||||
// Emit implicit barrier to synchronize threads and avoid data races on
|
||||
// initialization of firstprivate variables.
|
||||
// initialization of firstprivate variables and post-update of
|
||||
// lastprivate variables.
|
||||
CGM.getOpenMPRuntime().emitBarrierCall(
|
||||
*this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
|
||||
/*ForceSimpleCall=*/true);
|
||||
|
@ -1804,7 +1812,8 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
|
|||
CodeGenFunction::OMPPrivateScope LoopScope(CGF);
|
||||
if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
|
||||
// Emit implicit barrier to synchronize threads and avoid data races on
|
||||
// initialization of firstprivate variables.
|
||||
// initialization of firstprivate variables and post-update of lastprivate
|
||||
// variables.
|
||||
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
|
||||
CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
|
||||
/*ForceSimpleCall=*/true);
|
||||
|
@ -1883,8 +1892,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
|||
llvm::SmallVector<const Expr *, 8> SrcExprs;
|
||||
llvm::SmallVector<const Expr *, 8> AssignmentOps;
|
||||
// Check if there are any 'copyprivate' clauses associated with this
|
||||
// 'single'
|
||||
// construct.
|
||||
// 'single' construct.
|
||||
// Build a list of copyprivate variables along with helper expressions
|
||||
// (<source>, <destination>, <destination>=<source> expressions)
|
||||
for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
|
||||
|
@ -1897,10 +1905,9 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
|||
}
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
// Emit code for 'single' region along with 'copyprivate' clauses
|
||||
bool HasFirstprivates;
|
||||
auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) {
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
CodeGenFunction::OMPPrivateScope SingleScope(CGF);
|
||||
HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
|
||||
(void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
|
||||
CGF.EmitOMPPrivateClause(S, SingleScope);
|
||||
(void)SingleScope.Privatize();
|
||||
|
||||
|
@ -1909,10 +1916,9 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
|
|||
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
|
||||
CopyprivateVars, DestExprs, SrcExprs,
|
||||
AssignmentOps);
|
||||
// Emit an implicit barrier at the end (to avoid data race on firstprivate
|
||||
// init or if no 'nowait' clause was specified and no 'copyprivate' clause).
|
||||
if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) &&
|
||||
CopyprivateVars.empty()) {
|
||||
// Emit an implicit barrier at the end (to avoid data race if no 'nowait'
|
||||
// clause was specified and no 'copyprivate' clause).
|
||||
if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
|
||||
CGM.getOpenMPRuntime().emitBarrierCall(
|
||||
*this, S.getLocStart(),
|
||||
S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
|
||||
|
|
|
@ -95,7 +95,7 @@ int main() {
|
|||
// LAMBDA: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} [[SIVAR2_VAL]], i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]]
|
||||
|
||||
// LAMBDA: call void @__kmpc_barrier(
|
||||
// LAMBDA-NOT: call void @__kmpc_barrier(
|
||||
g = 1;
|
||||
g1 = 1;
|
||||
sivar = 2;
|
||||
|
@ -158,7 +158,7 @@ int main() {
|
|||
// BLOCKS: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF_ADDRR]]
|
||||
// BLOCKS: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]]
|
||||
|
||||
// BLOCKS: call void @__kmpc_barrier(
|
||||
// BLOCKS-NOT: call void @__kmpc_barrier(
|
||||
g = 1;
|
||||
g1 =1;
|
||||
sivar = 2;
|
||||
|
@ -246,7 +246,7 @@ int main() {
|
|||
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]]
|
||||
|
||||
// Synchronization for initialization.
|
||||
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
|
||||
// CHECK-NOT: call void @__kmpc_barrier(
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
@ -310,10 +310,8 @@ int main() {
|
|||
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
|
||||
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
|
||||
|
||||
// Synchronization for initialization.
|
||||
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
|
||||
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
|
||||
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
|
||||
// No synchronization for initialization.
|
||||
// CHECK-NOT: call void @__kmpc_barrier(
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
|
|
@ -31,7 +31,6 @@ struct S {
|
|||
// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
|
||||
// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
|
||||
// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
|
||||
// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
|
||||
|
||||
template <typename T>
|
||||
T tmain() {
|
||||
|
@ -70,7 +69,7 @@ int main() {
|
|||
// LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
|
||||
// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
|
||||
// LAMBDA: call {{.*}}void @__kmpc_barrier(
|
||||
// LAMBDA-NOT: call {{.*}}void @__kmpc_barrier(
|
||||
g = 1;
|
||||
sivar = 2;
|
||||
// LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
|
||||
|
@ -115,7 +114,7 @@ int main() {
|
|||
// BLOCK: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_REF_ADDR]]
|
||||
// BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]],
|
||||
// BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
|
||||
// BLOCKS: call {{.*}}void @__kmpc_barrier(
|
||||
// BLOCKS-NOT: call {{.*}}void @__kmpc_barrier(
|
||||
g = 1;
|
||||
sivar = 2;
|
||||
// BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
|
||||
|
@ -244,9 +243,7 @@ int main() {
|
|||
// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
|
||||
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
|
||||
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
|
||||
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
|
||||
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
|
||||
// CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
|
||||
// CHECK-NOT: call {{.*}}void @__kmpc_barrier(
|
||||
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
|
||||
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
|
||||
// CHECK: ret void
|
||||
|
|
|
@ -59,7 +59,6 @@ S<float> s_arr[] = {1, 2};
|
|||
// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
|
||||
S<float> var(3);
|
||||
// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
|
||||
// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
|
||||
// CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
|
||||
|
||||
// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
|
||||
|
@ -94,7 +93,7 @@ int main() {
|
|||
// LAMBDA: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]]
|
||||
|
||||
// LAMBDA: call void @__kmpc_barrier(
|
||||
// LAMBDA-NOT: call void @__kmpc_barrier(
|
||||
{
|
||||
g = 1;
|
||||
sivar = 10;
|
||||
|
@ -154,7 +153,7 @@ int main() {
|
|||
|
||||
// BLOCKS: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]],
|
||||
// BLOCKS: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
|
||||
// BLOCKS: call void @__kmpc_barrier(
|
||||
// BLOCKS-NOT: call void @__kmpc_barrier(
|
||||
{
|
||||
g = 1;
|
||||
sivar = 10;
|
||||
|
@ -242,7 +241,7 @@ int main() {
|
|||
// CHECK: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR]],
|
||||
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]],
|
||||
|
||||
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
|
||||
// CHECK-NOT: call void @__kmpc_barrier(
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
|
@ -304,10 +303,8 @@ int main() {
|
|||
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
|
||||
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
|
||||
|
||||
// Synchronization for initialization.
|
||||
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
|
||||
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
|
||||
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
|
||||
// No synchronization for initialization.
|
||||
// CHECK-NOT: call void @__kmpc_barrier(
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
|
|
@ -57,7 +57,6 @@ int vec[] = {1, 2};
|
|||
S<float> s_arr[] = {1, 2};
|
||||
// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
|
||||
S<float> var(3);
|
||||
// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
|
||||
// CHECK-DAG: [[SINGLE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
|
||||
|
||||
// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
|
||||
|
@ -215,7 +214,7 @@ int main() {
|
|||
// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
|
||||
// CHECK: call void @__kmpc_end_single(
|
||||
|
||||
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
|
||||
// CHECK-NOT: call void @__kmpc_barrier(
|
||||
|
||||
// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
|
||||
|
||||
|
|
Loading…
Reference in New Issue