forked from OSchip/llvm-project
[modules] Don't save uninteresting identifiers, and don't consider identifiers
to be interesting just because they are the name of a builtin. Reduces the size of an empty module by over 80% (~100KB). llvm-svn: 242650
This commit is contained in:
parent
ea4ad5a416
commit
9c25418424
|
@ -161,7 +161,7 @@ public:
|
|||
/// TokenID is normally read-only but there are 2 instances where we revert it
|
||||
/// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
|
||||
/// using this method so we can inform serialization about it.
|
||||
void RevertTokenIDToIdentifier() {
|
||||
void revertTokenIDToIdentifier() {
|
||||
assert(TokenID != tok::identifier && "Already at tok::identifier");
|
||||
TokenID = tok::identifier;
|
||||
RevertedTokenID = true;
|
||||
|
@ -183,6 +183,18 @@ public:
|
|||
}
|
||||
void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
|
||||
|
||||
/// \brief True if setNotBuiltin() was called.
|
||||
bool hasRevertedBuiltin() const {
|
||||
return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
|
||||
}
|
||||
|
||||
/// \brief Revert the identifier to a non-builtin identifier. We do this if
|
||||
/// the name of a known builtin library function is used to declare that
|
||||
/// function, but an unexpected type is specified.
|
||||
void revertBuiltin() {
|
||||
setBuiltinID(0);
|
||||
}
|
||||
|
||||
/// \brief Return a value indicating whether this is a builtin function.
|
||||
///
|
||||
/// 0 is not-built-in. 1 is builtin-for-some-nonprimary-target.
|
||||
|
|
|
@ -476,6 +476,11 @@ public:
|
|||
/// any point during translation.
|
||||
bool isDirectlyImported() const { return DirectlyImported; }
|
||||
|
||||
/// \brief Is this a module file for a module (rather than a PCH or similar).
|
||||
bool isModule() const {
|
||||
return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule;
|
||||
}
|
||||
|
||||
/// \brief Dump debugging output for this module.
|
||||
void dump();
|
||||
};
|
||||
|
|
|
@ -1489,7 +1489,7 @@ bool Parser::TryKeywordIdentFallback(bool DisableKeyword) {
|
|||
<< PP.getSpelling(Tok)
|
||||
<< DisableKeyword;
|
||||
if (DisableKeyword)
|
||||
Tok.getIdentifierInfo()->RevertTokenIDToIdentifier();
|
||||
Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
|
||||
Tok.setKind(tok::identifier);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -3115,7 +3115,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
|
|||
// remain visible, a single bogus local redeclaration (which is
|
||||
// actually only a warning) could break all the downstream code.
|
||||
if (!New->getLexicalDeclContext()->isFunctionOrMethod())
|
||||
New->getIdentifier()->setBuiltinID(Builtin::NotBuiltin);
|
||||
New->getIdentifier()->revertBuiltin();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -735,10 +735,10 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) {
|
|||
}
|
||||
|
||||
/// \brief Whether the given identifier is "interesting".
|
||||
static bool isInterestingIdentifier(IdentifierInfo &II) {
|
||||
static bool isInterestingIdentifier(IdentifierInfo &II, bool IsModule) {
|
||||
return II.hadMacroDefinition() ||
|
||||
II.isPoisoned() ||
|
||||
II.getObjCOrBuiltinID() ||
|
||||
(IsModule ? II.hasRevertedBuiltin() : II.getObjCOrBuiltinID()) ||
|
||||
II.hasRevertedTokenIDToIdentifier() ||
|
||||
II.getFETokenInfo<void>();
|
||||
}
|
||||
|
@ -767,7 +767,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
|
|||
}
|
||||
if (!II->isFromAST()) {
|
||||
II->setIsFromAST();
|
||||
if (isInterestingIdentifier(*II))
|
||||
if (isInterestingIdentifier(*II, F.isModule()))
|
||||
II->setChangedSinceDeserialization();
|
||||
}
|
||||
Reader.markIdentifierUpToDate(II);
|
||||
|
@ -784,6 +784,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
|
|||
unsigned Bits = endian::readNext<uint16_t, little, unaligned>(d);
|
||||
bool CPlusPlusOperatorKeyword = readBit(Bits);
|
||||
bool HasRevertedTokenIDToIdentifier = readBit(Bits);
|
||||
bool HasRevertedBuiltin = readBit(Bits);
|
||||
bool Poisoned = readBit(Bits);
|
||||
bool ExtensionToken = readBit(Bits);
|
||||
bool HadMacroDefinition = readBit(Bits);
|
||||
|
@ -794,8 +795,15 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
|
|||
// Set or check the various bits in the IdentifierInfo structure.
|
||||
// Token IDs are read-only.
|
||||
if (HasRevertedTokenIDToIdentifier && II->getTokenID() != tok::identifier)
|
||||
II->RevertTokenIDToIdentifier();
|
||||
II->revertTokenIDToIdentifier();
|
||||
if (!F.isModule())
|
||||
II->setObjCOrBuiltinID(ObjCOrBuiltinID);
|
||||
else if (HasRevertedBuiltin && II->getBuiltinID()) {
|
||||
II->revertBuiltin();
|
||||
assert((II->hasRevertedBuiltin() ||
|
||||
II->getObjCOrBuiltinID() == ObjCOrBuiltinID) &&
|
||||
"Incorrect ObjC keyword or builtin ID");
|
||||
}
|
||||
assert(II->isExtensionToken() == ExtensionToken &&
|
||||
"Incorrect extension token flag");
|
||||
(void)ExtensionToken;
|
||||
|
|
|
@ -3102,15 +3102,16 @@ class ASTIdentifierTableTrait {
|
|||
ASTWriter &Writer;
|
||||
Preprocessor &PP;
|
||||
IdentifierResolver &IdResolver;
|
||||
bool IsModule;
|
||||
|
||||
/// \brief Determines whether this is an "interesting" identifier that needs a
|
||||
/// full IdentifierInfo structure written into the hash table. Notably, this
|
||||
/// doesn't check whether the name has macros defined; use PublicMacroIterator
|
||||
/// to check that.
|
||||
bool isInterestingIdentifier(IdentifierInfo *II, uint64_t MacroOffset) {
|
||||
bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
|
||||
if (MacroOffset ||
|
||||
II->isPoisoned() ||
|
||||
II->getObjCOrBuiltinID() ||
|
||||
(IsModule ? II->hasRevertedBuiltin() : II->getObjCOrBuiltinID()) ||
|
||||
II->hasRevertedTokenIDToIdentifier() ||
|
||||
II->getFETokenInfo<void>())
|
||||
return true;
|
||||
|
@ -3129,13 +3130,17 @@ public:
|
|||
typedef unsigned offset_type;
|
||||
|
||||
ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
|
||||
IdentifierResolver &IdResolver)
|
||||
: Writer(Writer), PP(PP), IdResolver(IdResolver) {}
|
||||
IdentifierResolver &IdResolver, bool IsModule)
|
||||
: Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule) {}
|
||||
|
||||
static hash_value_type ComputeHash(const IdentifierInfo* II) {
|
||||
return llvm::HashString(II->getName());
|
||||
}
|
||||
|
||||
bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
|
||||
return isInterestingIdentifier(II, 0);
|
||||
}
|
||||
|
||||
std::pair<unsigned,unsigned>
|
||||
EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) {
|
||||
unsigned KeyLen = II->getLength() + 1;
|
||||
|
@ -3192,6 +3197,7 @@ public:
|
|||
Bits = (Bits << 1) | unsigned(HadMacroDefinition);
|
||||
Bits = (Bits << 1) | unsigned(II->isExtensionToken());
|
||||
Bits = (Bits << 1) | unsigned(II->isPoisoned());
|
||||
Bits = (Bits << 1) | unsigned(II->hasRevertedBuiltin());
|
||||
Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier());
|
||||
Bits = (Bits << 1) | unsigned(II->isCPlusPlusOperatorKeyword());
|
||||
LE.write<uint16_t>(Bits);
|
||||
|
@ -3229,7 +3235,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
|
|||
// strings.
|
||||
{
|
||||
llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
|
||||
ASTIdentifierTableTrait Trait(*this, PP, IdResolver);
|
||||
ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
|
||||
|
||||
// Look for any identifiers that were named while processing the
|
||||
// headers, but are otherwise not needed. We add these to the hash
|
||||
|
@ -3245,6 +3251,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
|
|||
// that their order is stable.
|
||||
std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
|
||||
for (const IdentifierInfo *II : IIs)
|
||||
if (Trait.isInterestingNonMacroIdentifier(II))
|
||||
getIdentifierRef(II);
|
||||
|
||||
// Create the on-disk hash table representation. We only store offsets
|
||||
|
@ -4444,6 +4451,7 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
|
|||
WriteHeaderSearch(PP.getHeaderSearchInfo());
|
||||
WriteSelectors(SemaRef);
|
||||
WriteReferencedSelectorsPool(SemaRef);
|
||||
WriteLateParsedTemplates(SemaRef);
|
||||
WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
|
||||
WriteFPPragmaOptions(SemaRef.getFPOptions());
|
||||
WriteOpenCLExtensions(SemaRef);
|
||||
|
@ -4559,7 +4567,6 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
|
|||
WriteDeclReplacementsBlock();
|
||||
WriteRedeclarations();
|
||||
WriteObjCCategories();
|
||||
WriteLateParsedTemplates(SemaRef);
|
||||
if(!WritingModule)
|
||||
WriteOptimizePragmaOptions(SemaRef);
|
||||
|
||||
|
|
|
@ -10,6 +10,12 @@
|
|||
// RUN: -emit-module -fmodule-name=empty -o %t/check.pcm \
|
||||
// RUN: %s
|
||||
//
|
||||
// The module file should be identical each time we produce it.
|
||||
// RUN: diff %t/base.pcm %t/check.pcm
|
||||
//
|
||||
// We expect an empty module to be less than 30KB.
|
||||
// REQUIRES: shell
|
||||
// RUN: wc -c %t/base.pcm | FileCheck --check-prefix=CHECK-SIZE %s
|
||||
// CHECK-SIZE: {{^[12][0-9]{4} }}
|
||||
|
||||
module empty { header "Inputs/empty.h" export * }
|
||||
|
|
Loading…
Reference in New Issue