llvm-project/clang/Lex/IdentifierTable.cpp

387 lines
14 KiB
C++

//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the IdentifierInfo, IdentifierVisitor, and
// IdentifierTable interfaces.
//
//===----------------------------------------------------------------------===//
#include "clang/Lex/IdentifierTable.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Basic/LangOptions.h"
#include <iostream>
using namespace llvm;
using namespace clang;
//===----------------------------------------------------------------------===//
// IdentifierInfo Implementation
//===----------------------------------------------------------------------===//
void IdentifierInfo::Destroy() {
delete Macro;
}
//===----------------------------------------------------------------------===//
// IdentifierVisitor Implementation
//===----------------------------------------------------------------------===//
IdentifierVisitor::~IdentifierVisitor() {
}
//===----------------------------------------------------------------------===//
// Memory Allocation Support
//===----------------------------------------------------------------------===//
/// The identifier table has a very simple memory allocation pattern: it just
/// keeps allocating identifiers, then never frees them unless it frees them
/// all. As such, we use a simple bump-pointer memory allocator to make
/// allocation speedy. Shark showed that malloc was 27% of the time spent in
/// IdentifierTable::getIdentifier with malloc, and takes a 4.3% time with this.
#define USE_ALLOCATOR 1
#if USE_ALLOCATOR
namespace {
class MemRegion {
unsigned RegionSize;
MemRegion *Next;
char *NextPtr;
public:
void Init(unsigned size, MemRegion *next) {
RegionSize = size;
Next = next;
NextPtr = (char*)(this+1);
// FIXME: uses GCC extension.
unsigned Alignment = __alignof__(IdentifierInfo);
NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) &
~(intptr_t)(Alignment-1));
}
const MemRegion *getNext() const { return Next; }
unsigned getNumBytesAllocated() const {
return NextPtr-(const char*)this;
}
/// Allocate - Allocate and return at least the specified number of bytes.
///
void *Allocate(unsigned AllocSize, MemRegion **RegPtr) {
// FIXME: uses GCC extension.
unsigned Alignment = __alignof__(IdentifierInfo);
// Round size up to an even multiple of the alignment.
AllocSize = (AllocSize+Alignment-1) & ~(Alignment-1);
// If there is space in this region for the identifier, return it.
if (unsigned(NextPtr+AllocSize-(char*)this) <= RegionSize) {
void *Result = NextPtr;
NextPtr += AllocSize;
return Result;
}
// Otherwise, we have to allocate a new chunk. Create one twice as big as
// this one.
MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2);
NewRegion->Init(RegionSize*2, this);
// Update the current "first region" pointer to point to the new region.
*RegPtr = NewRegion;
// Try allocating from it now.
return NewRegion->Allocate(AllocSize, RegPtr);
}
/// Deallocate - Release all memory for this region to the system.
///
void Deallocate() {
MemRegion *next = Next;
free(this);
if (next)
next->Deallocate();
}
};
}
#endif
//===----------------------------------------------------------------------===//
// IdentifierTable Implementation
//===----------------------------------------------------------------------===//
/// IdentifierLink - There is one of these allocated by IdentifierInfo.
/// These form the linked list of buckets for the hash table.
struct IdentifierBucket {
/// Next - This is the next bucket in the linked list.
IdentifierBucket *Next;
IdentifierInfo TokInfo;
// NOTE: TokInfo must be the last element in this structure, as the string
// information for the identifier is allocated right after it.
};
// FIXME: start hashtablesize off at 8K entries, GROW when density gets to 3.
/// HASH_TABLE_SIZE - The current size of the hash table. Note that this must
/// always be a power of two!
static unsigned HASH_TABLE_SIZE = 8096*4;
IdentifierTable::IdentifierTable(const LangOptions &LangOpts) {
IdentifierBucket **TableArray = new IdentifierBucket*[HASH_TABLE_SIZE]();
TheTable = TableArray;
NumIdentifiers = 0;
#if USE_ALLOCATOR
TheMemory = malloc(8*4096);
((MemRegion*)TheMemory)->Init(8*4096, 0);
#endif
memset(TheTable, 0, HASH_TABLE_SIZE*sizeof(IdentifierBucket*));
AddKeywords(LangOpts);
}
IdentifierTable::~IdentifierTable() {
IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
IdentifierBucket *Id = TableArray[i];
while (Id) {
// Free memory referenced by the identifier (e.g. macro info).
Id->TokInfo.Destroy();
IdentifierBucket *Next = Id->Next;
#if !USE_ALLOCATOR
free(Id);
#endif
Id = Next;
}
}
#if USE_ALLOCATOR
((MemRegion*)TheMemory)->Deallocate();
#endif
delete [] TableArray;
}
/// HashString - Compute a hash code for the specified string.
///
static unsigned HashString(const char *Start, const char *End) {
unsigned int Result = 0;
// Perl hash function.
while (Start != End)
Result = Result * 33 + *Start++;
Result = Result + (Result >> 5);
return Result;
}
IdentifierInfo &IdentifierTable::get(const char *NameStart,
const char *NameEnd) {
IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
unsigned Hash = HashString(NameStart, NameEnd) & (HASH_TABLE_SIZE-1);
unsigned Length = NameEnd-NameStart;
IdentifierBucket *IdentHead = TableArray[Hash];
for (IdentifierBucket *Identifier = IdentHead, *LastID = 0; Identifier;
LastID = Identifier, Identifier = Identifier->Next) {
if (Identifier->TokInfo.getNameLength() == Length &&
memcmp(Identifier->TokInfo.getName(), NameStart, Length) == 0) {
// If found identifier wasn't at start of bucket, move it there so
// that frequently searched for identifiers are found earlier, even if
// they first occur late in the source file.
if (LastID) {
LastID->Next = Identifier->Next;
Identifier->Next = IdentHead;
TableArray[Hash] = Identifier;
}
return Identifier->TokInfo;
}
}
// Allocate a new identifier, with space for the null-terminated string at the
// end.
unsigned AllocSize = sizeof(IdentifierBucket)+Length+1;
#if USE_ALLOCATOR
IdentifierBucket *Identifier = (IdentifierBucket*)
((MemRegion*)TheMemory)->Allocate(AllocSize, (MemRegion**)&TheMemory);
#else
IdentifierBucket *Identifier = (IdentifierBucket*)malloc(AllocSize);
#endif
Identifier->TokInfo.NameLen = Length;
Identifier->TokInfo.Macro = 0;
Identifier->TokInfo.TokenID = tok::identifier;
Identifier->TokInfo.PPID = tok::pp_not_keyword;
Identifier->TokInfo.ObjCID = tok::objc_not_keyword;
Identifier->TokInfo.IsExtension = false;
Identifier->TokInfo.IsPoisoned = false;
Identifier->TokInfo.IsOtherTargetMacro = false;
Identifier->TokInfo.FETokenInfo = 0;
// Copy the string information.
char *StrBuffer = (char*)(Identifier+1);
memcpy(StrBuffer, NameStart, Length);
StrBuffer[Length] = 0; // Null terminate string.
// Link it into the hash table. Adding it to the start of the hash table is
// useful for buckets with lots of entries. This means that more recently
// referenced identifiers will be near the head of the bucket.
Identifier->Next = IdentHead;
TableArray[Hash] = Identifier;
return Identifier->TokInfo;
}
IdentifierInfo &IdentifierTable::get(const std::string &Name) {
// Don't use c_str() here: no need to be null terminated.
const char *NameBytes = &Name[0];
unsigned Size = Name.size();
return get(NameBytes, NameBytes+Size);
}
/// VisitIdentifiers - This method walks through all of the identifiers,
/// invoking IV->VisitIdentifier for each of them.
void IdentifierTable::VisitIdentifiers(const IdentifierVisitor &IV) {
IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next)
IV.VisitIdentifier(Id->TokInfo);
}
}
//===----------------------------------------------------------------------===//
// Language Keyword Implementation
//===----------------------------------------------------------------------===//
/// AddKeyword - This method is used to associate a token ID with specific
/// identifiers because they are language keywords. This causes the lexer to
/// automatically map matching identifiers to specialized token codes.
///
/// The C90/C99/CPP flags are set to 0 if the token should be enabled in the
/// specified langauge, set to 1 if it is an extension in the specified
/// language, and set to 2 if disabled in the specified language.
static void AddKeyword(const std::string &Keyword, tok::TokenKind TokenCode,
int C90, int C99, int CXX,
const LangOptions &LangOpts, IdentifierTable &Table) {
int Flags = LangOpts.CPlusPlus ? CXX : (LangOpts.C99 ? C99 : C90);
// Don't add this keyword if disabled in this language or if an extension
// and extensions are disabled.
if (Flags + LangOpts.NoExtensions >= 2) return;
const char *Str = &Keyword[0];
IdentifierInfo &Info = Table.get(Str, Str+Keyword.size());
Info.setTokenID(TokenCode);
Info.setIsExtensionToken(Flags == 1);
}
/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
/// "elif".
static void AddPPKeyword(tok::PPKeywordKind PPID,
const char *Name, unsigned NameLen,
IdentifierTable &Table) {
Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
}
/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
/// "property".
static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
const char *Name, unsigned NameLen,
IdentifierTable &Table) {
Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
}
/// AddKeywords - Add all keywords to the symbol table.
///
void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
enum {
C90Shift = 0,
EXTC90 = 1 << C90Shift,
NOTC90 = 2 << C90Shift,
C99Shift = 2,
EXTC99 = 1 << C99Shift,
NOTC99 = 2 << C99Shift,
CPPShift = 4,
EXTCPP = 1 << CPPShift,
NOTCPP = 2 << CPPShift,
Mask = 3
};
// Add keywords and tokens for the current language.
#define KEYWORD(NAME, FLAGS) \
AddKeyword(#NAME, tok::kw_ ## NAME, \
((FLAGS) >> C90Shift) & Mask, \
((FLAGS) >> C99Shift) & Mask, \
((FLAGS) >> CPPShift) & Mask, LangOpts, *this);
#define ALIAS(NAME, TOK) \
AddKeyword(NAME, tok::kw_ ## TOK, 0, 0, 0, LangOpts, *this);
#define PPKEYWORD(NAME) \
AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
#define OBJC1_AT_KEYWORD(NAME) \
if (LangOpts.ObjC1) \
AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
#define OBJC2_AT_KEYWORD(NAME) \
if (LangOpts.ObjC2) \
AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
#include "clang/Basic/TokenKinds.def"
}
//===----------------------------------------------------------------------===//
// Stats Implementation
//===----------------------------------------------------------------------===//
/// PrintStats - Print statistics about how well the identifier table is doing
/// at hashing identifiers.
void IdentifierTable::PrintStats() const {
unsigned NumIdentifiers = 0;
unsigned NumEmptyBuckets = 0;
unsigned MaxBucketLength = 0;
unsigned AverageIdentifierSize = 0;
unsigned MaxIdentifierLength = 0;
IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
unsigned NumIdentifiersInBucket = 0;
for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
AverageIdentifierSize += Id->TokInfo.getNameLength();
if (MaxIdentifierLength < Id->TokInfo.getNameLength())
MaxIdentifierLength = Id->TokInfo.getNameLength();
++NumIdentifiersInBucket;
}
if (NumIdentifiersInBucket > MaxBucketLength)
MaxBucketLength = NumIdentifiersInBucket;
if (NumIdentifiersInBucket == 0)
++NumEmptyBuckets;
NumIdentifiers += NumIdentifiersInBucket;
}
std::cerr << "\n*** Identifier Table Stats:\n";
std::cerr << "# Identifiers: " << NumIdentifiers << "\n";
std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n";
std::cerr << "Max identifiers in one bucket: " << MaxBucketLength << "\n";
std::cerr << "Hash density (#identifiers per bucket): "
<< NumIdentifiers/(double)HASH_TABLE_SIZE << "\n";
std::cerr << "Nonempty hash density (average chain length): "
<< NumIdentifiers/(double)(HASH_TABLE_SIZE-NumEmptyBuckets) << "\n";
std::cerr << "Ave identifier length: "
<< (AverageIdentifierSize/(double)NumIdentifiers) << "\n";
std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n";
// Compute statistics about the memory allocated for identifiers.
#if USE_ALLOCATOR
unsigned BytesUsed = 0;
unsigned NumRegions = 0;
const MemRegion *R = (MemRegion*)TheMemory;
for (; R; R = R->getNext(), ++NumRegions) {
BytesUsed += R->getNumBytesAllocated();
}
std::cerr << "\nNumber of memory regions: " << NumRegions << "\n";
std::cerr << "Bytes allocated for identifiers: " << BytesUsed << "\n";
#endif
}