forked from OSchip/llvm-project
[OPENMP]PR53344: Emit code for final update of the inscan reduction vars in worksharing loops.
Need to emit final update of the inscan reduction variables. For worksharing loops, the reduction values are stored in the temp array, need to copy the last element to the original var at the end of the construct. Differential Revision: https://reviews.llvm.org/D121156
This commit is contained in:
parent
17f3a92ee5
commit
1462e63f67
|
@ -3515,6 +3515,57 @@ static void emitScanBasedDirectiveDecls(
|
|||
}
|
||||
}
|
||||
|
||||
/// Copies final inscan reductions values to the original variables.
|
||||
/// The code is the following:
|
||||
/// \code
|
||||
/// <orig_var> = buffer[num_iters-1];
|
||||
/// \endcode
|
||||
static void emitScanBasedDirectiveFinals(
|
||||
CodeGenFunction &CGF, const OMPLoopDirective &S,
|
||||
llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
|
||||
llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
|
||||
NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
|
||||
SmallVector<const Expr *, 4> Shareds;
|
||||
SmallVector<const Expr *, 4> LHSs;
|
||||
SmallVector<const Expr *, 4> RHSs;
|
||||
SmallVector<const Expr *, 4> Privates;
|
||||
SmallVector<const Expr *, 4> CopyOps;
|
||||
SmallVector<const Expr *, 4> CopyArrayElems;
|
||||
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
|
||||
assert(C->getModifier() == OMPC_REDUCTION_inscan &&
|
||||
"Only inscan reductions are expected.");
|
||||
Shareds.append(C->varlist_begin(), C->varlist_end());
|
||||
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
|
||||
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
|
||||
Privates.append(C->privates().begin(), C->privates().end());
|
||||
CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
|
||||
CopyArrayElems.append(C->copy_array_elems().begin(),
|
||||
C->copy_array_elems().end());
|
||||
}
|
||||
// Create temp var and copy LHS value to this temp value.
|
||||
// LHS = TMP[LastIter];
|
||||
llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
|
||||
OMPScanNumIterations,
|
||||
llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
|
||||
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
|
||||
const Expr *PrivateExpr = Privates[I];
|
||||
const Expr *OrigExpr = Shareds[I];
|
||||
const Expr *CopyArrayElem = CopyArrayElems[I];
|
||||
CodeGenFunction::OpaqueValueMapping IdxMapping(
|
||||
CGF,
|
||||
cast<OpaqueValueExpr>(
|
||||
cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
|
||||
RValue::get(OMPLast));
|
||||
LValue DestLVal = CGF.EmitLValue(OrigExpr);
|
||||
LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
|
||||
CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF),
|
||||
SrcLVal.getAddress(CGF),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
|
||||
CopyOps[I]);
|
||||
}
|
||||
}
|
||||
|
||||
/// Emits the code for the directive with inscan reductions.
|
||||
/// The code is the following:
|
||||
/// \code
|
||||
|
@ -3709,6 +3760,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
|
|||
if (!isOpenMPParallelDirective(S.getDirectiveKind()))
|
||||
emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
|
||||
emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
|
||||
if (!isOpenMPParallelDirective(S.getDirectiveKind()))
|
||||
emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
|
||||
} else {
|
||||
CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
|
||||
HasCancel);
|
||||
|
@ -4282,23 +4335,25 @@ void CodeGenFunction::EmitOMPParallelForDirective(
|
|||
(void)emitWorksharingDirective(CGF, S, S.hasCancel());
|
||||
};
|
||||
{
|
||||
if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
|
||||
const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
|
||||
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
|
||||
CGCapturedStmtInfo CGSI(CR_OpenMP);
|
||||
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
|
||||
OMPLoopScope LoopScope(CGF, S);
|
||||
return CGF.EmitScalarExpr(S.getNumIterations());
|
||||
};
|
||||
bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
|
||||
[](const OMPReductionClause *C) {
|
||||
return C->getModifier() == OMPC_REDUCTION_inscan;
|
||||
})) {
|
||||
const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
|
||||
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
|
||||
CGCapturedStmtInfo CGSI(CR_OpenMP);
|
||||
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
|
||||
OMPLoopScope LoopScope(CGF, S);
|
||||
return CGF.EmitScalarExpr(S.getNumIterations());
|
||||
};
|
||||
});
|
||||
if (IsInscan)
|
||||
emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
|
||||
}
|
||||
auto LPCRegion =
|
||||
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
|
||||
emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
|
||||
emitEmptyBoundParameters);
|
||||
if (IsInscan)
|
||||
emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
|
||||
}
|
||||
// Check for outer lastprivate conditional update.
|
||||
checkForLastprivateConditionalUpdate(*this, S);
|
||||
|
@ -4313,23 +4368,25 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
|
|||
(void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
|
||||
};
|
||||
{
|
||||
if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
|
||||
const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
|
||||
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
|
||||
CGCapturedStmtInfo CGSI(CR_OpenMP);
|
||||
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
|
||||
OMPLoopScope LoopScope(CGF, S);
|
||||
return CGF.EmitScalarExpr(S.getNumIterations());
|
||||
};
|
||||
bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
|
||||
[](const OMPReductionClause *C) {
|
||||
return C->getModifier() == OMPC_REDUCTION_inscan;
|
||||
})) {
|
||||
const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
|
||||
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
|
||||
CGCapturedStmtInfo CGSI(CR_OpenMP);
|
||||
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
|
||||
OMPLoopScope LoopScope(CGF, S);
|
||||
return CGF.EmitScalarExpr(S.getNumIterations());
|
||||
};
|
||||
});
|
||||
if (IsInscan)
|
||||
emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
|
||||
}
|
||||
auto LPCRegion =
|
||||
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
|
||||
emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
|
||||
emitEmptyBoundParameters);
|
||||
if (IsInscan)
|
||||
emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
|
||||
}
|
||||
// Check for outer lastprivate conditional update.
|
||||
checkForLastprivateConditionalUpdate(*this, S);
|
||||
|
|
|
@ -27,6 +27,13 @@ void baz(int n) {
|
|||
// CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
|
||||
|
||||
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
|
||||
// CHECK: [[LAST:%.+]] = mul nsw i64 9, %
|
||||
// CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[LAST]]
|
||||
// CHECK: [[BC:%.+]] = bitcast float* [[LAST_REF]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast ([10 x float]* @_ZZ3baziE1a to i8*), i8* align 4 [[BC]], i64 %{{.+}}, i1 false)
|
||||
// CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 9
|
||||
// CHECK: [[LAST_VAL:%.+]] = load double, double* [[LAST_REF_B]],
|
||||
// CHECK: store double [[LAST_VAL]], double* @_ZZ3baziE1b,
|
||||
|
||||
// CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
|
||||
|
||||
|
|
Loading…
Reference in New Issue