forked from OSchip/llvm-project
[Clang][OpenMP] Fix the issue that `llvm.lifetime.end` is emitted too early for variables captured in linear clause
Currently if an OpenMP program uses `linear` clause, and is compiled with optimization, `llvm.lifetime.end` for variables listed in `linear` clause are emitted too early such that there could still be uses after that. Let's take the following code as example: ``` // loop.c int j; int *u; void loop(int n) { int i; for (i = 0; i < n; ++i) { ++j; u = &j; } } ``` We compile using the command: ``` clang -cc1 -fopenmp-simd -O3 -x c -triple x86_64-apple-darwin10 -emit-llvm loop.c -o loop.ll ``` The following IR (simplified) will be generated: ``` @j = local_unnamed_addr global i32 0, align 4 @u = local_unnamed_addr global ptr null, align 8 define void @loop(i32 noundef %n) local_unnamed_addr { entry: %j = alloca i32, align 4 %cmp = icmp sgt i32 %n, 0 br i1 %cmp, label %simd.if.then, label %simd.if.end simd.if.then: ; preds = %entry call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %j) store ptr %j, ptr @u, align 8 call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %j) %0 = load i32, ptr %j, align 4 store i32 %0, ptr @j, align 4 br label %simd.if.end simd.if.end: ; preds = %simd.if.then, %entry ret void } ``` The most important part is: ``` call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %j) %0 = load i32, ptr %j, align 4 store i32 %0, ptr @j, align 4 ``` `%j` is still loaded after `@llvm.lifetime.end.p0(i64 4, ptr nonnull %j)`. This could cause the backend incorrectly optimizes the code and further generates incorrect code. The root cause is, when we emit a construct that could have `linear` clause, it usually has the following pattern: ``` EmitOMPLinearClauseInit(S) { OMPPrivateScope LoopScope(*this); ... EmitOMPLinearClause(S, LoopScope); ... (void)LoopScope.Privatize(); ... } EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); ``` Variables that need to be privatized are added into `LoopScope`, which also serves as a RAII object. When `LoopScope` is destructed and if optimization is enabled, a `@llvm.lifetime.end` is also emitted for each privatized variable. However, the writing back to original variables in `linear` clause happens after the scope in `EmitOMPLinearClauseFinal`, causing the issue we see above. A quick "fix" seems to be, moving `EmitOMPLinearClauseFinal` inside the scope. However, it doesn't work. That's because the local variable map has been updated by `LoopScope` such that a variable declaration is mapped to the privatized variable, instead of the actual one. In that way, the following code will be generated: ``` %0 = load i32, ptr %j, align 4 store i32 %0, ptr %j, align 4 call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %j) ``` Well, now the life time is correct, but apparently the writing back is broken. In this patch, a new function `OMPPrivateScope::restoreMap` is added and called before calling `EmitOMPLinearClauseFinal`. This can make sure that `EmitOMPLinearClauseFinal` can find the orignal varaibls to write back. Fixes #56913. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D131272
This commit is contained in:
parent
91f3f0bf31
commit
e21202dac1
|
@ -2582,8 +2582,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
|
|||
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
|
||||
emitPostUpdateForReductionClause(CGF, S,
|
||||
[](CodeGenFunction &) { return nullptr; });
|
||||
LoopScope.restoreMap();
|
||||
CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
|
||||
}
|
||||
CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
|
||||
// Emit: if (PreCond) - end.
|
||||
if (ContBlock) {
|
||||
CGF.EmitBranch(ContBlock);
|
||||
|
@ -3428,11 +3429,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
|
|||
EmitOMPLastprivateClauseFinal(
|
||||
S, isOpenMPSimdDirective(S.getDirectiveKind()),
|
||||
Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
|
||||
LoopScope.restoreMap();
|
||||
EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
|
||||
return CGF.Builder.CreateIsNotNull(
|
||||
CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
|
||||
});
|
||||
}
|
||||
EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
|
||||
return CGF.Builder.CreateIsNotNull(
|
||||
CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
|
||||
});
|
||||
DoacrossCleanupScope.ForceCleanup();
|
||||
// We're now done with the loop, so jump to the continuation block.
|
||||
if (ContBlock) {
|
||||
|
@ -7660,6 +7662,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
|
|||
CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
|
||||
(*LIP)->getType(), S.getBeginLoc())));
|
||||
}
|
||||
LoopScope.restoreMap();
|
||||
CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
|
||||
return CGF.Builder.CreateIsNotNull(
|
||||
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
|
||||
|
|
|
@ -1094,7 +1094,7 @@ public:
|
|||
|
||||
void ForceCleanup() {
|
||||
RunCleanupsScope::ForceCleanup();
|
||||
MappedVars.restore(CGF);
|
||||
restoreMap();
|
||||
}
|
||||
|
||||
/// Exit scope - all the mapped variables are restored.
|
||||
|
@ -1108,6 +1108,11 @@ public:
|
|||
VD = VD->getCanonicalDecl();
|
||||
return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0;
|
||||
}
|
||||
|
||||
/// Restore all mapped variables w/o clean up. This is usefully when we want
|
||||
/// to reference the original variables but don't want the clean up because
|
||||
/// that could emit lifetime end too early, causing backend issue #56913.
|
||||
void restoreMap() { MappedVars.restore(CGF); }
|
||||
};
|
||||
|
||||
/// Save/restore original map of previously emitted local vars in case when we
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
|
||||
// RUN: %clang_cc1 -fopenmp-simd -O1 -x c -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
|
||||
|
||||
int j;
|
||||
int *u;
|
||||
|
||||
void loop(int n) {
|
||||
int i;
|
||||
#pragma omp parallel master taskloop simd linear(j)
|
||||
for (i = 0; i < n; ++i) {
|
||||
++j;
|
||||
u = &j;
|
||||
}
|
||||
}
|
||||
// CHECK-LABEL: define {{[^@]+}}@loop
|
||||
// CHECK-SAME: (i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[J:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N]], 0
|
||||
// CHECK-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
|
||||
// CHECK: simd.if.then:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[TBAA2:![0-9]+]]
|
||||
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[J]]) #[[ATTR2:[0-9]+]]
|
||||
// CHECK-NEXT: store ptr [[J]], ptr @u, align 8, !tbaa [[TBAA6:![0-9]+]], !llvm.access.group [[ACC_GRP8:![0-9]+]]
|
||||
// CHECK-NEXT: [[INC_LE:%.*]] = add i32 [[TMP0]], [[N]]
|
||||
// CHECK-NEXT: store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[TBAA2]]
|
||||
// CHECK-NEXT: store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[TBAA2]]
|
||||
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[J]]) #[[ATTR2]]
|
||||
// CHECK-NEXT: br label [[SIMD_IF_END]]
|
||||
// CHECK: simd.if.end:
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
|
@ -322,18 +322,18 @@ int main() {
|
|||
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
|
||||
// CHECK1: omp.loop.exit:
|
||||
// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]])
|
||||
// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i64* [[DOTLVAR__ADDR]] to i8*
|
||||
// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP5]], i8* [[TMP19]], i8* inttoptr (i64 5 to i8*))
|
||||
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
|
||||
// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]]
|
||||
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4
|
||||
// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
|
||||
// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]]
|
||||
// CHECK1: .omp.linear.pu:
|
||||
// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[PVAR2]], align 8
|
||||
// CHECK1-NEXT: store float* [[TMP22]], float** [[TMP0]], align 8
|
||||
// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTLVAR__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i64 [[TMP23]], i64* [[TMP1]], align 8
|
||||
// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[PVAR2]], align 8
|
||||
// CHECK1-NEXT: store float* [[TMP21]], float** [[TMP0]], align 8
|
||||
// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLVAR__ADDR]], align 8
|
||||
// CHECK1-NEXT: store i64 [[TMP22]], i64* [[TMP1]], align 8
|
||||
// CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]]
|
||||
// CHECK1: .omp.linear.pu.done:
|
||||
// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i64* [[DOTLVAR__ADDR]] to i8*
|
||||
// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP5]], i8* [[TMP23]], i8* inttoptr (i64 5 to i8*))
|
||||
// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]])
|
||||
// CHECK1-NEXT: ret void
|
||||
//
|
||||
|
|
|
@ -1847,8 +1847,12 @@ void loop() {
|
|||
// CHECK6-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
|
||||
// CHECK6-NEXT: br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]]
|
||||
// CHECK6: .omp.linear.pu.i:
|
||||
// CHECK6-NEXT: [[TMP39:%.*]] = load i32, i32* [[J_I]], align 4, !noalias !14
|
||||
// CHECK6-NEXT: store i32 [[TMP39]], i32* [[J_I]], align 4, !noalias !14
|
||||
// CHECK6-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 0
|
||||
// CHECK6-NEXT: [[TMP40:%.*]] = load i32*, i32** [[TMP39]], align 8
|
||||
// CHECK6-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 1
|
||||
// CHECK6-NEXT: [[TMP42:%.*]] = load i32*, i32** [[TMP41]], align 8
|
||||
// CHECK6-NEXT: [[TMP43:%.*]] = load i32, i32* [[J_I]], align 4, !noalias !14
|
||||
// CHECK6-NEXT: store i32 [[TMP43]], i32* [[TMP42]], align 4
|
||||
// CHECK6-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]]
|
||||
// CHECK6: .omp_outlined..1.exit:
|
||||
// CHECK6-NEXT: ret i32 0
|
||||
|
|
Loading…
Reference in New Issue