[Clang] [P2025] Analyze only potential scopes for NRVO

Before the patch we calculated the NRVO candidate looking at the
variable's whole enclosing scope. The research in [P2025] shows that
looking at the variable's potential scope is better and covers more
cases where NRVO would be safe and desirable.

Many thanks to @Izaron for the original implementation.

Reviewed By: ChuanqiXu

Differential Revision: https://reviews.llvm.org/D119792
This commit is contained in:
Roman Rusyaev 2022-07-26 18:56:04 +08:00 committed by Chuanqi Xu
parent bf759e3b10
commit fec5ff2a32
6 changed files with 641 additions and 722 deletions

View File

@ -494,6 +494,9 @@ C++ Language Changes in Clang
unsigned character literals. This fixes `Issue 54886 <https://github.com/llvm/llvm-project/issues/54886>`_.
- Stopped allowing constraints on non-template functions to be compliant with
dcl.decl.general p4.
- Improved ``copy elision`` optimization. It's possible to apply ``NRVO`` for an object if at the moment when
any return statement of this object is executed, the ``return slot`` won't be occupied by another object.
C++20 Feature Support
^^^^^^^^^^^^^^^^^^^^^

View File

@ -210,9 +210,19 @@ private:
/// Used to determine if errors occurred in this scope.
DiagnosticErrorTrap ErrorTrap;
/// A lattice consisting of undefined, a single NRVO candidate variable in
/// this scope, or over-defined. The bit is true when over-defined.
llvm::PointerIntPair<VarDecl *, 1, bool> NRVO;
/// A single NRVO candidate variable in this scope.
/// There are three possible values:
/// 1) pointer to VarDecl that denotes NRVO candidate itself.
/// 2) nullptr value means that NRVO is not allowed in this scope
/// (e.g. return a function parameter).
/// 3) None value means that there is no NRVO candidate in this scope
/// (i.e. there are no return statements in this scope).
Optional<VarDecl *> NRVO;
/// Represents return slots for NRVO candidates in the current scope.
/// If a variable is present in this set, it means that a return slot is
/// available for this variable in the current scope.
llvm::SmallPtrSet<VarDecl *, 8> ReturnSlots;
void setFlags(Scope *Parent, unsigned F);
@ -304,6 +314,10 @@ public:
bool decl_empty() const { return DeclsInScope.empty(); }
void AddDecl(Decl *D) {
if (auto *VD = dyn_cast<VarDecl>(D))
if (!isa<ParmVarDecl>(VD))
ReturnSlots.insert(VD);
DeclsInScope.insert(D);
}
@ -527,23 +541,9 @@ public:
UsingDirectives.end());
}
void addNRVOCandidate(VarDecl *VD) {
if (NRVO.getInt())
return;
if (NRVO.getPointer() == nullptr) {
NRVO.setPointer(VD);
return;
}
if (NRVO.getPointer() != VD)
setNoNRVO();
}
void updateNRVOCandidate(VarDecl *VD);
void setNoNRVO() {
NRVO.setInt(true);
NRVO.setPointer(nullptr);
}
void mergeNRVOIntoParent();
void applyNRVO();
/// Init - This is used by the parser to implement scope caching.
void Init(Scope *parent, unsigned flags);

View File

@ -91,7 +91,7 @@ void Scope::Init(Scope *parent, unsigned flags) {
UsingDirectives.clear();
Entity = nullptr;
ErrorTrap.reset();
NRVO.setPointerAndInt(nullptr, false);
NRVO = None;
}
bool Scope::containedInPrototypeScope() const {
@ -118,19 +118,71 @@ void Scope::AddFlags(unsigned FlagsToSet) {
Flags |= FlagsToSet;
}
void Scope::mergeNRVOIntoParent() {
if (VarDecl *Candidate = NRVO.getPointer()) {
if (isDeclScope(Candidate))
Candidate->setNRVOVariable(true);
// The algorithm for updating NRVO candidate is as follows:
// 1. All previous candidates become invalid because a new NRVO candidate is
// obtained. Therefore, we need to clear return slots for other
// variables defined before the current return statement in the current
// scope and in outer scopes.
// 2. Store the new candidate if its return slot is available. Otherwise,
// there is no NRVO candidate so far.
void Scope::updateNRVOCandidate(VarDecl *VD) {
auto UpdateReturnSlotsInScopeForVD = [VD](Scope *S) -> bool {
bool IsReturnSlotFound = S->ReturnSlots.contains(VD);
// We found a candidate variable that can be put into a return slot.
// Clear the set, because other variables cannot occupy a return
// slot in the same scope.
S->ReturnSlots.clear();
if (IsReturnSlotFound)
S->ReturnSlots.insert(VD);
return IsReturnSlotFound;
};
bool CanBePutInReturnSlot = false;
for (auto *S = this; S; S = S->getParent()) {
CanBePutInReturnSlot |= UpdateReturnSlotsInScopeForVD(S);
if (S->getEntity())
break;
}
if (getEntity())
// Consider the variable as NRVO candidate if the return slot is available
// for it in the current scope, or if it can be available in outer scopes.
NRVO = CanBePutInReturnSlot ? VD : nullptr;
}
void Scope::applyNRVO() {
// There is no NRVO candidate in the current scope.
if (!NRVO.hasValue())
return;
if (NRVO.getInt())
getParent()->setNoNRVO();
else if (NRVO.getPointer())
getParent()->addNRVOCandidate(NRVO.getPointer());
if (*NRVO && isDeclScope(*NRVO))
NRVO.getValue()->setNRVOVariable(true);
// It's necessary to propagate NRVO candidate to the parent scope for cases
// when the parent scope doesn't contain a return statement.
// For example:
// X foo(bool b) {
// X x;
// if (b)
// return x;
// exit(0);
// }
// Also, we need to propagate nullptr value that means NRVO is not
// allowed in this scope.
// For example:
// X foo(bool b) {
// X x;
// if (b)
// return x;
// else
// return X(); // NRVO is not allowed
// }
if (!getEntity())
getParent()->NRVO = *NRVO;
}
LLVM_DUMP_METHOD void Scope::dump() const { dumpImpl(llvm::errs()); }
@ -193,8 +245,10 @@ void Scope::dumpImpl(raw_ostream &OS) const {
if (const DeclContext *DC = getEntity())
OS << "Entity : (clang::DeclContext*)" << DC << '\n';
if (NRVO.getInt())
OS << "NRVO not allowed\n";
else if (NRVO.getPointer())
OS << "NRVO candidate : (clang::VarDecl*)" << NRVO.getPointer() << '\n';
if (!NRVO)
OS << "there is no NRVO candidate\n";
else if (*NRVO)
OS << "NRVO candidate : (clang::VarDecl*)" << *NRVO << '\n';
else
OS << "NRVO is not allowed\n";
}

View File

@ -2092,7 +2092,7 @@ static void CheckPoppedLabel(LabelDecl *L, Sema &S) {
}
void Sema::ActOnPopScope(SourceLocation Loc, Scope *S) {
S->mergeNRVOIntoParent();
S->applyNRVO();
if (S->decl_empty()) return;
assert((S->getFlags() & (Scope::DeclScope | Scope::TemplateParamScope)) &&

View File

@ -3898,12 +3898,10 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp,
if (R.isInvalid() || ExprEvalContexts.back().isDiscardedStatementContext())
return R;
if (VarDecl *VD =
const_cast<VarDecl*>(cast<ReturnStmt>(R.get())->getNRVOCandidate())) {
CurScope->addNRVOCandidate(VD);
} else {
CurScope->setNoNRVO();
}
VarDecl *VD =
const_cast<VarDecl *>(cast<ReturnStmt>(R.get())->getNRVOCandidate());
CurScope->updateNRVOCandidate(VD);
CheckJumpOutOfSEHFinally(*this, ReturnLoc, *CurScope->getFnParent());

File diff suppressed because it is too large Load Diff