[modules] Don't save uninteresting identifiers, and don't consider identifiers

to be interesting just because they are the name of a builtin. Reduces the size
of an empty module by over 80% (~100KB).

llvm-svn: 242650
This commit is contained in:
Richard Smith 2015-07-19 21:41:12 +00:00
parent ea4ad5a416
commit 9c25418424
7 changed files with 53 additions and 15 deletions

View File

@ -161,7 +161,7 @@ public:
/// TokenID is normally read-only but there are 2 instances where we revert it /// TokenID is normally read-only but there are 2 instances where we revert it
/// to tok::identifier for libstdc++ 4.2. Keep track of when this happens /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
/// using this method so we can inform serialization about it. /// using this method so we can inform serialization about it.
void RevertTokenIDToIdentifier() { void revertTokenIDToIdentifier() {
assert(TokenID != tok::identifier && "Already at tok::identifier"); assert(TokenID != tok::identifier && "Already at tok::identifier");
TokenID = tok::identifier; TokenID = tok::identifier;
RevertedTokenID = true; RevertedTokenID = true;
@ -183,6 +183,18 @@ public:
} }
void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
/// \brief True if setNotBuiltin() was called.
bool hasRevertedBuiltin() const {
return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
}
/// \brief Revert the identifier to a non-builtin identifier. We do this if
/// the name of a known builtin library function is used to declare that
/// function, but an unexpected type is specified.
void revertBuiltin() {
setBuiltinID(0);
}
/// \brief Return a value indicating whether this is a builtin function. /// \brief Return a value indicating whether this is a builtin function.
/// ///
/// 0 is not-built-in. 1 is builtin-for-some-nonprimary-target. /// 0 is not-built-in. 1 is builtin-for-some-nonprimary-target.

View File

@ -476,6 +476,11 @@ public:
/// any point during translation. /// any point during translation.
bool isDirectlyImported() const { return DirectlyImported; } bool isDirectlyImported() const { return DirectlyImported; }
/// \brief Is this a module file for a module (rather than a PCH or similar).
bool isModule() const {
return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule;
}
/// \brief Dump debugging output for this module. /// \brief Dump debugging output for this module.
void dump(); void dump();
}; };

View File

@ -1489,7 +1489,7 @@ bool Parser::TryKeywordIdentFallback(bool DisableKeyword) {
<< PP.getSpelling(Tok) << PP.getSpelling(Tok)
<< DisableKeyword; << DisableKeyword;
if (DisableKeyword) if (DisableKeyword)
Tok.getIdentifierInfo()->RevertTokenIDToIdentifier(); Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
Tok.setKind(tok::identifier); Tok.setKind(tok::identifier);
return true; return true;
} }

View File

@ -3115,7 +3115,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
// remain visible, a single bogus local redeclaration (which is // remain visible, a single bogus local redeclaration (which is
// actually only a warning) could break all the downstream code. // actually only a warning) could break all the downstream code.
if (!New->getLexicalDeclContext()->isFunctionOrMethod()) if (!New->getLexicalDeclContext()->isFunctionOrMethod())
New->getIdentifier()->setBuiltinID(Builtin::NotBuiltin); New->getIdentifier()->revertBuiltin();
return false; return false;
} }

View File

@ -735,10 +735,10 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) {
} }
/// \brief Whether the given identifier is "interesting". /// \brief Whether the given identifier is "interesting".
static bool isInterestingIdentifier(IdentifierInfo &II) { static bool isInterestingIdentifier(IdentifierInfo &II, bool IsModule) {
return II.hadMacroDefinition() || return II.hadMacroDefinition() ||
II.isPoisoned() || II.isPoisoned() ||
II.getObjCOrBuiltinID() || (IsModule ? II.hasRevertedBuiltin() : II.getObjCOrBuiltinID()) ||
II.hasRevertedTokenIDToIdentifier() || II.hasRevertedTokenIDToIdentifier() ||
II.getFETokenInfo<void>(); II.getFETokenInfo<void>();
} }
@ -767,7 +767,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
} }
if (!II->isFromAST()) { if (!II->isFromAST()) {
II->setIsFromAST(); II->setIsFromAST();
if (isInterestingIdentifier(*II)) if (isInterestingIdentifier(*II, F.isModule()))
II->setChangedSinceDeserialization(); II->setChangedSinceDeserialization();
} }
Reader.markIdentifierUpToDate(II); Reader.markIdentifierUpToDate(II);
@ -784,6 +784,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
unsigned Bits = endian::readNext<uint16_t, little, unaligned>(d); unsigned Bits = endian::readNext<uint16_t, little, unaligned>(d);
bool CPlusPlusOperatorKeyword = readBit(Bits); bool CPlusPlusOperatorKeyword = readBit(Bits);
bool HasRevertedTokenIDToIdentifier = readBit(Bits); bool HasRevertedTokenIDToIdentifier = readBit(Bits);
bool HasRevertedBuiltin = readBit(Bits);
bool Poisoned = readBit(Bits); bool Poisoned = readBit(Bits);
bool ExtensionToken = readBit(Bits); bool ExtensionToken = readBit(Bits);
bool HadMacroDefinition = readBit(Bits); bool HadMacroDefinition = readBit(Bits);
@ -794,8 +795,15 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
// Set or check the various bits in the IdentifierInfo structure. // Set or check the various bits in the IdentifierInfo structure.
// Token IDs are read-only. // Token IDs are read-only.
if (HasRevertedTokenIDToIdentifier && II->getTokenID() != tok::identifier) if (HasRevertedTokenIDToIdentifier && II->getTokenID() != tok::identifier)
II->RevertTokenIDToIdentifier(); II->revertTokenIDToIdentifier();
II->setObjCOrBuiltinID(ObjCOrBuiltinID); if (!F.isModule())
II->setObjCOrBuiltinID(ObjCOrBuiltinID);
else if (HasRevertedBuiltin && II->getBuiltinID()) {
II->revertBuiltin();
assert((II->hasRevertedBuiltin() ||
II->getObjCOrBuiltinID() == ObjCOrBuiltinID) &&
"Incorrect ObjC keyword or builtin ID");
}
assert(II->isExtensionToken() == ExtensionToken && assert(II->isExtensionToken() == ExtensionToken &&
"Incorrect extension token flag"); "Incorrect extension token flag");
(void)ExtensionToken; (void)ExtensionToken;

View File

@ -3102,15 +3102,16 @@ class ASTIdentifierTableTrait {
ASTWriter &Writer; ASTWriter &Writer;
Preprocessor &PP; Preprocessor &PP;
IdentifierResolver &IdResolver; IdentifierResolver &IdResolver;
bool IsModule;
/// \brief Determines whether this is an "interesting" identifier that needs a /// \brief Determines whether this is an "interesting" identifier that needs a
/// full IdentifierInfo structure written into the hash table. Notably, this /// full IdentifierInfo structure written into the hash table. Notably, this
/// doesn't check whether the name has macros defined; use PublicMacroIterator /// doesn't check whether the name has macros defined; use PublicMacroIterator
/// to check that. /// to check that.
bool isInterestingIdentifier(IdentifierInfo *II, uint64_t MacroOffset) { bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
if (MacroOffset || if (MacroOffset ||
II->isPoisoned() || II->isPoisoned() ||
II->getObjCOrBuiltinID() || (IsModule ? II->hasRevertedBuiltin() : II->getObjCOrBuiltinID()) ||
II->hasRevertedTokenIDToIdentifier() || II->hasRevertedTokenIDToIdentifier() ||
II->getFETokenInfo<void>()) II->getFETokenInfo<void>())
return true; return true;
@ -3129,13 +3130,17 @@ public:
typedef unsigned offset_type; typedef unsigned offset_type;
ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP, ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
IdentifierResolver &IdResolver) IdentifierResolver &IdResolver, bool IsModule)
: Writer(Writer), PP(PP), IdResolver(IdResolver) {} : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule) {}
static hash_value_type ComputeHash(const IdentifierInfo* II) { static hash_value_type ComputeHash(const IdentifierInfo* II) {
return llvm::HashString(II->getName()); return llvm::HashString(II->getName());
} }
bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
return isInterestingIdentifier(II, 0);
}
std::pair<unsigned,unsigned> std::pair<unsigned,unsigned>
EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) { EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) {
unsigned KeyLen = II->getLength() + 1; unsigned KeyLen = II->getLength() + 1;
@ -3192,6 +3197,7 @@ public:
Bits = (Bits << 1) | unsigned(HadMacroDefinition); Bits = (Bits << 1) | unsigned(HadMacroDefinition);
Bits = (Bits << 1) | unsigned(II->isExtensionToken()); Bits = (Bits << 1) | unsigned(II->isExtensionToken());
Bits = (Bits << 1) | unsigned(II->isPoisoned()); Bits = (Bits << 1) | unsigned(II->isPoisoned());
Bits = (Bits << 1) | unsigned(II->hasRevertedBuiltin());
Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier()); Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier());
Bits = (Bits << 1) | unsigned(II->isCPlusPlusOperatorKeyword()); Bits = (Bits << 1) | unsigned(II->isCPlusPlusOperatorKeyword());
LE.write<uint16_t>(Bits); LE.write<uint16_t>(Bits);
@ -3229,7 +3235,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
// strings. // strings.
{ {
llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator; llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
ASTIdentifierTableTrait Trait(*this, PP, IdResolver); ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
// Look for any identifiers that were named while processing the // Look for any identifiers that were named while processing the
// headers, but are otherwise not needed. We add these to the hash // headers, but are otherwise not needed. We add these to the hash
@ -3245,7 +3251,8 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
// that their order is stable. // that their order is stable.
std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>()); std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
for (const IdentifierInfo *II : IIs) for (const IdentifierInfo *II : IIs)
getIdentifierRef(II); if (Trait.isInterestingNonMacroIdentifier(II))
getIdentifierRef(II);
// Create the on-disk hash table representation. We only store offsets // Create the on-disk hash table representation. We only store offsets
// for identifiers that appear here for the first time. // for identifiers that appear here for the first time.
@ -4444,6 +4451,7 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
WriteHeaderSearch(PP.getHeaderSearchInfo()); WriteHeaderSearch(PP.getHeaderSearchInfo());
WriteSelectors(SemaRef); WriteSelectors(SemaRef);
WriteReferencedSelectorsPool(SemaRef); WriteReferencedSelectorsPool(SemaRef);
WriteLateParsedTemplates(SemaRef);
WriteIdentifierTable(PP, SemaRef.IdResolver, isModule); WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
WriteFPPragmaOptions(SemaRef.getFPOptions()); WriteFPPragmaOptions(SemaRef.getFPOptions());
WriteOpenCLExtensions(SemaRef); WriteOpenCLExtensions(SemaRef);
@ -4559,7 +4567,6 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
WriteDeclReplacementsBlock(); WriteDeclReplacementsBlock();
WriteRedeclarations(); WriteRedeclarations();
WriteObjCCategories(); WriteObjCCategories();
WriteLateParsedTemplates(SemaRef);
if(!WritingModule) if(!WritingModule)
WriteOptimizePragmaOptions(SemaRef); WriteOptimizePragmaOptions(SemaRef);

View File

@ -10,6 +10,12 @@
// RUN: -emit-module -fmodule-name=empty -o %t/check.pcm \ // RUN: -emit-module -fmodule-name=empty -o %t/check.pcm \
// RUN: %s // RUN: %s
// //
// The module file should be identical each time we produce it.
// RUN: diff %t/base.pcm %t/check.pcm // RUN: diff %t/base.pcm %t/check.pcm
//
// We expect an empty module to be less than 30KB.
// REQUIRES: shell
// RUN: wc -c %t/base.pcm | FileCheck --check-prefix=CHECK-SIZE %s
// CHECK-SIZE: {{^[12][0-9]{4} }}
module empty { header "Inputs/empty.h" export * } module empty { header "Inputs/empty.h" export * }