llvm-project/clang/lib/Lex/MacroInfo.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

269 lines
9.0 KiB
C++
Raw Normal View History

//===- MacroInfo.cpp - Information about #defined identifiers -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the MacroInfo interface.
//
//===----------------------------------------------------------------------===//
#include "clang/Lex/MacroInfo.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/Token.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <utility>
using namespace clang;
[Preprocessor] Reduce the memory overhead of `#define` directives (Recommit) Recently we observed high memory pressure caused by clang during some parallel builds. We discovered that we have several projects that have a large number of #define directives in their TUs (on the order of millions), which caused huge memory consumption in clang due to a lot of allocations for MacroInfo. We would like to reduce the memory overhead of clang for a single #define to reduce the memory overhead for these files, to allow us to reduce the memory pressure on the system during highly parallel builds. This change achieves that by removing the SmallVector in MacroInfo and instead storing the tokens in an array allocated using the bump pointer allocator, after all tokens are lexed. The added unit test with 1000000 #define directives illustrates the problem. Prior to this change, on arm64 macOS, clang's PP bump pointer allocator allocated 272007616 bytes, and used roughly 272 bytes per #define. After this change, clang's PP bump pointer allocator allocates 120002016 bytes, and uses only roughly 120 bytes per #define. For an example test file that we have internally with 7.8 million #define directives, this change produces the following improvement on arm64 macOS: Persistent allocation footprint for this test case file as it's being compiled to LLVM IR went down 22% from 5.28 GB to 4.07 GB and the total allocations went down 14% from 8.26 GB to 7.05 GB. Furthermore, this change reduced the total number of allocations made by the system for this clang invocation from 1454853 to 133663, an order of magnitude improvement. The recommit fixes the LLDB build failure. Differential Revision: https://reviews.llvm.org/D117348
2022-02-12 05:50:30 +08:00
namespace {
// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer
// and 4 byte SourceLocation.
template <int> class MacroInfoSizeChecker {
public:
constexpr static bool AsExpected = true;
};
template <> class MacroInfoSizeChecker<8> {
public:
constexpr static bool AsExpected =
sizeof(MacroInfo) == (32 + sizeof(SourceLocation) * 2);
};
static_assert(MacroInfoSizeChecker<sizeof(void *)>::AsExpected,
"Unexpected size of MacroInfo");
} // end namespace
Keep history of macro definitions and #undefs Summary: Summary: Keep history of macro definitions and #undefs with corresponding source locations, so that we can later find out all macros active in a specified source location. We don't save the history in PCH (no need currently). Memory overhead is about sizeof(void*)*3*<number of macro definitions and #undefs>+<in-memory size of all #undef'd macros> I've run a test on a file composed of 109 .h files from boost 1.49 on x86-64 linux. Stats before this patch: *** Preprocessor Stats: 73222 directives found: 19171 #define. 4345 #undef. #include/#include_next/#import: 5233 source files entered. 27 max include stack depth 19210 #if/#ifndef/#ifdef. 2384 #else/#elif. 6891 #endif. 408 #pragma. 14466 #if/#ifndef#ifdef regions skipped 80023/451669/1270 obj/fn/builtin macros expanded, 85724 on the fast path. 127145 token paste (##) operations performed, 11008 on the fast path. Preprocessor Memory: 5874615B total BumpPtr: 4399104 Macro Expanded Tokens: 417768 Predefines Buffer: 8135 Macros: 1048576 #pragma push_macro Info: 0 Poison Reasons: 1024 Comment Handlers: 8 Stats with this patch: ... Preprocessor Memory: 7541687B total BumpPtr: 6066176 Macro Expanded Tokens: 417768 Predefines Buffer: 8135 Macros: 1048576 #pragma push_macro Info: 0 Poison Reasons: 1024 Comment Handlers: 8 In my test increase in memory usage is about 1.7Mb, which is ~28% of initial preprocessor's memory usage and about 0.8% of clang's total VMM allocation. As for CPU overhead, it should only be noticeable when iterating over all macros, and should mostly consist of couple extra dereferences and one comparison per macro + skipping of #undef'd macros. It's less trivial to measure, though, as the preprocessor consumes a very small fraction of compilation time. Reviewers: doug.gregor, klimek, rsmith, djasper Reviewed By: doug.gregor CC: cfe-commits, chandlerc Differential Revision: http://llvm-reviews.chandlerc.com/D28 llvm-svn: 162810
2012-08-29 08:20:03 +08:00
MacroInfo::MacroInfo(SourceLocation DefLoc)
: Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false),
IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false),
HasCommaPasting(false), IsDisabled(false), IsUsed(false),
IsAllowRedefinitionsWithoutWarning(false), IsWarnIfUnused(false),
UsedForHeaderGuard(false) {}
unsigned MacroInfo::getDefinitionLengthSlow(const SourceManager &SM) const {
assert(!IsDefinitionLengthCached);
IsDefinitionLengthCached = true;
[Preprocessor] Reduce the memory overhead of `#define` directives (Recommit) Recently we observed high memory pressure caused by clang during some parallel builds. We discovered that we have several projects that have a large number of #define directives in their TUs (on the order of millions), which caused huge memory consumption in clang due to a lot of allocations for MacroInfo. We would like to reduce the memory overhead of clang for a single #define to reduce the memory overhead for these files, to allow us to reduce the memory pressure on the system during highly parallel builds. This change achieves that by removing the SmallVector in MacroInfo and instead storing the tokens in an array allocated using the bump pointer allocator, after all tokens are lexed. The added unit test with 1000000 #define directives illustrates the problem. Prior to this change, on arm64 macOS, clang's PP bump pointer allocator allocated 272007616 bytes, and used roughly 272 bytes per #define. After this change, clang's PP bump pointer allocator allocates 120002016 bytes, and uses only roughly 120 bytes per #define. For an example test file that we have internally with 7.8 million #define directives, this change produces the following improvement on arm64 macOS: Persistent allocation footprint for this test case file as it's being compiled to LLVM IR went down 22% from 5.28 GB to 4.07 GB and the total allocations went down 14% from 8.26 GB to 7.05 GB. Furthermore, this change reduced the total number of allocations made by the system for this clang invocation from 1454853 to 133663, an order of magnitude improvement. The recommit fixes the LLDB build failure. Differential Revision: https://reviews.llvm.org/D117348
2022-02-12 05:50:30 +08:00
ArrayRef<Token> ReplacementTokens = tokens();
if (ReplacementTokens.empty())
return (DefinitionLength = 0);
const Token &firstToken = ReplacementTokens.front();
const Token &lastToken = ReplacementTokens.back();
SourceLocation macroStart = firstToken.getLocation();
SourceLocation macroEnd = lastToken.getLocation();
assert(macroStart.isValid() && macroEnd.isValid());
assert((macroStart.isFileID() || firstToken.is(tok::comment)) &&
"Macro defined in macro?");
assert((macroEnd.isFileID() || lastToken.is(tok::comment)) &&
"Macro defined in macro?");
std::pair<FileID, unsigned>
startInfo = SM.getDecomposedExpansionLoc(macroStart);
std::pair<FileID, unsigned>
endInfo = SM.getDecomposedExpansionLoc(macroEnd);
assert(startInfo.first == endInfo.first &&
"Macro definition spanning multiple FileIDs ?");
assert(startInfo.second <= endInfo.second);
DefinitionLength = endInfo.second - startInfo.second;
DefinitionLength += lastToken.getLength();
return DefinitionLength;
}
/// Return true if the specified macro definition is equal to
/// this macro in spelling, arguments, and whitespace.
///
/// \param Syntactically if true, the macro definitions can be identical even
/// if they use different identifiers for the function macro parameters.
/// Otherwise the comparison is lexical and this implements the rules in
/// C99 6.10.3.
bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
bool Syntactically) const {
bool Lexically = !Syntactically;
// Check # tokens in replacement, number of args, and various flags all match.
[Preprocessor] Reduce the memory overhead of `#define` directives (Recommit) Recently we observed high memory pressure caused by clang during some parallel builds. We discovered that we have several projects that have a large number of #define directives in their TUs (on the order of millions), which caused huge memory consumption in clang due to a lot of allocations for MacroInfo. We would like to reduce the memory overhead of clang for a single #define to reduce the memory overhead for these files, to allow us to reduce the memory pressure on the system during highly parallel builds. This change achieves that by removing the SmallVector in MacroInfo and instead storing the tokens in an array allocated using the bump pointer allocator, after all tokens are lexed. The added unit test with 1000000 #define directives illustrates the problem. Prior to this change, on arm64 macOS, clang's PP bump pointer allocator allocated 272007616 bytes, and used roughly 272 bytes per #define. After this change, clang's PP bump pointer allocator allocates 120002016 bytes, and uses only roughly 120 bytes per #define. For an example test file that we have internally with 7.8 million #define directives, this change produces the following improvement on arm64 macOS: Persistent allocation footprint for this test case file as it's being compiled to LLVM IR went down 22% from 5.28 GB to 4.07 GB and the total allocations went down 14% from 8.26 GB to 7.05 GB. Furthermore, this change reduced the total number of allocations made by the system for this clang invocation from 1454853 to 133663, an order of magnitude improvement. The recommit fixes the LLDB build failure. Differential Revision: https://reviews.llvm.org/D117348
2022-02-12 05:50:30 +08:00
if (getNumTokens() != Other.getNumTokens() ||
getNumParams() != Other.getNumParams() ||
isFunctionLike() != Other.isFunctionLike() ||
isC99Varargs() != Other.isC99Varargs() ||
isGNUVarargs() != Other.isGNUVarargs())
return false;
if (Lexically) {
// Check arguments.
for (param_iterator I = param_begin(), OI = Other.param_begin(),
E = param_end();
I != E; ++I, ++OI)
if (*I != *OI) return false;
}
// Check all the tokens.
[Preprocessor] Reduce the memory overhead of `#define` directives (Recommit) Recently we observed high memory pressure caused by clang during some parallel builds. We discovered that we have several projects that have a large number of #define directives in their TUs (on the order of millions), which caused huge memory consumption in clang due to a lot of allocations for MacroInfo. We would like to reduce the memory overhead of clang for a single #define to reduce the memory overhead for these files, to allow us to reduce the memory pressure on the system during highly parallel builds. This change achieves that by removing the SmallVector in MacroInfo and instead storing the tokens in an array allocated using the bump pointer allocator, after all tokens are lexed. The added unit test with 1000000 #define directives illustrates the problem. Prior to this change, on arm64 macOS, clang's PP bump pointer allocator allocated 272007616 bytes, and used roughly 272 bytes per #define. After this change, clang's PP bump pointer allocator allocates 120002016 bytes, and uses only roughly 120 bytes per #define. For an example test file that we have internally with 7.8 million #define directives, this change produces the following improvement on arm64 macOS: Persistent allocation footprint for this test case file as it's being compiled to LLVM IR went down 22% from 5.28 GB to 4.07 GB and the total allocations went down 14% from 8.26 GB to 7.05 GB. Furthermore, this change reduced the total number of allocations made by the system for this clang invocation from 1454853 to 133663, an order of magnitude improvement. The recommit fixes the LLDB build failure. Differential Revision: https://reviews.llvm.org/D117348
2022-02-12 05:50:30 +08:00
for (unsigned i = 0; i != NumReplacementTokens; ++i) {
const Token &A = ReplacementTokens[i];
const Token &B = Other.ReplacementTokens[i];
if (A.getKind() != B.getKind())
return false;
// If this isn't the first first token, check that the whitespace and
// start-of-line characteristics match.
if (i != 0 &&
(A.isAtStartOfLine() != B.isAtStartOfLine() ||
A.hasLeadingSpace() != B.hasLeadingSpace()))
return false;
// If this is an identifier, it is easy.
if (A.getIdentifierInfo() || B.getIdentifierInfo()) {
if (A.getIdentifierInfo() == B.getIdentifierInfo())
continue;
if (Lexically)
return false;
// With syntactic equivalence the parameter names can be different as long
// as they are used in the same place.
int AArgNum = getParameterNum(A.getIdentifierInfo());
if (AArgNum == -1)
return false;
if (AArgNum != Other.getParameterNum(B.getIdentifierInfo()))
return false;
continue;
}
// Otherwise, check the spelling.
if (PP.getSpelling(A) != PP.getSpelling(B))
return false;
}
return true;
}
LLVM_DUMP_METHOD void MacroInfo::dump() const {
llvm::raw_ostream &Out = llvm::errs();
// FIXME: Dump locations.
Out << "MacroInfo " << this;
if (IsBuiltinMacro) Out << " builtin";
if (IsDisabled) Out << " disabled";
if (IsUsed) Out << " used";
if (IsAllowRedefinitionsWithoutWarning)
Out << " allow_redefinitions_without_warning";
if (IsWarnIfUnused) Out << " warn_if_unused";
if (UsedForHeaderGuard) Out << " header_guard";
Out << "\n #define <macro>";
if (IsFunctionLike) {
Out << "(";
for (unsigned I = 0; I != NumParameters; ++I) {
if (I) Out << ", ";
Out << ParameterList[I]->getName();
}
if (IsC99Varargs || IsGNUVarargs) {
if (NumParameters && IsC99Varargs) Out << ", ";
Out << "...";
}
Out << ")";
}
bool First = true;
[Preprocessor] Reduce the memory overhead of `#define` directives (Recommit) Recently we observed high memory pressure caused by clang during some parallel builds. We discovered that we have several projects that have a large number of #define directives in their TUs (on the order of millions), which caused huge memory consumption in clang due to a lot of allocations for MacroInfo. We would like to reduce the memory overhead of clang for a single #define to reduce the memory overhead for these files, to allow us to reduce the memory pressure on the system during highly parallel builds. This change achieves that by removing the SmallVector in MacroInfo and instead storing the tokens in an array allocated using the bump pointer allocator, after all tokens are lexed. The added unit test with 1000000 #define directives illustrates the problem. Prior to this change, on arm64 macOS, clang's PP bump pointer allocator allocated 272007616 bytes, and used roughly 272 bytes per #define. After this change, clang's PP bump pointer allocator allocates 120002016 bytes, and uses only roughly 120 bytes per #define. For an example test file that we have internally with 7.8 million #define directives, this change produces the following improvement on arm64 macOS: Persistent allocation footprint for this test case file as it's being compiled to LLVM IR went down 22% from 5.28 GB to 4.07 GB and the total allocations went down 14% from 8.26 GB to 7.05 GB. Furthermore, this change reduced the total number of allocations made by the system for this clang invocation from 1454853 to 133663, an order of magnitude improvement. The recommit fixes the LLDB build failure. Differential Revision: https://reviews.llvm.org/D117348
2022-02-12 05:50:30 +08:00
for (const Token &Tok : tokens()) {
// Leading space is semantically meaningful in a macro definition,
// so preserve it in the dump output.
if (First || Tok.hasLeadingSpace())
Out << " ";
First = false;
if (const char *Punc = tok::getPunctuatorSpelling(Tok.getKind()))
Out << Punc;
else if (Tok.isLiteral() && Tok.getLiteralData())
Out << StringRef(Tok.getLiteralData(), Tok.getLength());
else if (auto *II = Tok.getIdentifierInfo())
Out << II->getName();
else
Out << Tok.getName();
}
}
MacroDirective::DefInfo MacroDirective::getDefinition() {
MacroDirective *MD = this;
SourceLocation UndefLoc;
Optional<bool> isPublic;
for (; MD; MD = MD->getPrevious()) {
if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))
return DefInfo(DefMD, UndefLoc,
!isPublic.hasValue() || isPublic.getValue());
if (UndefMacroDirective *UndefMD = dyn_cast<UndefMacroDirective>(MD)) {
UndefLoc = UndefMD->getLocation();
continue;
}
VisibilityMacroDirective *VisMD = cast<VisibilityMacroDirective>(MD);
if (!isPublic.hasValue())
isPublic = VisMD->isPublic();
}
return DefInfo(nullptr, UndefLoc,
!isPublic.hasValue() || isPublic.getValue());
}
const MacroDirective::DefInfo
MacroDirective::findDirectiveAtLoc(SourceLocation L,
const SourceManager &SM) const {
assert(L.isValid() && "SourceLocation is invalid.");
for (DefInfo Def = getDefinition(); Def; Def = Def.getPreviousDefinition()) {
if (Def.getLocation().isInvalid() || // For macros defined on the command line.
SM.isBeforeInTranslationUnit(Def.getLocation(), L))
return (!Def.isUndefined() ||
SM.isBeforeInTranslationUnit(L, Def.getUndefLocation()))
? Def : DefInfo();
}
return DefInfo();
}
LLVM_DUMP_METHOD void MacroDirective::dump() const {
llvm::raw_ostream &Out = llvm::errs();
switch (getKind()) {
case MD_Define: Out << "DefMacroDirective"; break;
case MD_Undefine: Out << "UndefMacroDirective"; break;
case MD_Visibility: Out << "VisibilityMacroDirective"; break;
}
Out << " " << this;
// FIXME: Dump SourceLocation.
if (auto *Prev = getPrevious())
Out << " prev " << Prev;
if (IsFromPCH) Out << " from_pch";
if (isa<VisibilityMacroDirective>(this))
Out << (IsPublic ? " public" : " private");
if (auto *DMD = dyn_cast<DefMacroDirective>(this)) {
if (auto *Info = DMD->getInfo()) {
Out << "\n ";
Info->dump();
}
}
Out << "\n";
}
ModuleMacro *ModuleMacro::create(Preprocessor &PP, Module *OwningModule,
IdentifierInfo *II, MacroInfo *Macro,
ArrayRef<ModuleMacro *> Overrides) {
void *Mem = PP.getPreprocessorAllocator().Allocate(
sizeof(ModuleMacro) + sizeof(ModuleMacro *) * Overrides.size(),
alignof(ModuleMacro));
return new (Mem) ModuleMacro(OwningModule, II, Macro, Overrides);
}