Pool allocate ImplicitConversionSequences.

To avoid malloc thrashing give OverloadCandidateSet an inline capacity for conversion sequences.
We use the fact that OverloadCandidates never outlive the OverloadCandidateSet and have a fixed
amount of conversion sequences.

This eliminates the oversized SmallVector from OverloadCandidate shrinking it from 752 to 208 bytes.

On the test case from the "Why is CLANG++ so freaking slow" thread on llvmdev this avoids one gig
of vector reallocation (including memcpy) which translates into 5-10% speedup on Lion/x86_64.

Overload candidate computation is still the biggest malloc contributor when compiling templated
c++ code.

llvm-svn: 148186
This commit is contained in:
Benjamin Kramer 2012-01-14 16:32:05 +00:00
parent fb761ff544
commit b009517ad4
2 changed files with 54 additions and 18 deletions

View File

@ -24,6 +24,7 @@
#include "clang/Sema/SemaFixItUtils.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Allocator.h"
namespace clang {
class ASTContext;
@ -582,12 +583,17 @@ namespace clang {
CXXConversionDecl *Surrogate;
/// Conversions - The conversion sequences used to convert the
/// function arguments to the function parameters.
SmallVector<ImplicitConversionSequence, 4> Conversions;
/// function arguments to the function parameters, the pointer points to a
/// fixed size array with NumConversions elements. The memory is owned by
/// the OverloadCandidateSet.
ImplicitConversionSequence *Conversions;
/// The FixIt hints which can be used to fix the Bad candidate.
ConversionFixItGenerator Fix;
/// NumConversions - The number of elements in the Conversions array.
unsigned NumConversions;
/// Viable - True to indicate that this overload candidate is viable.
bool Viable;
@ -653,13 +659,17 @@ namespace clang {
StandardConversionSequence FinalConversion;
};
~OverloadCandidate() {
for (unsigned i = 0, e = NumConversions; i != e; ++i)
Conversions[i].~ImplicitConversionSequence();
}
/// hasAmbiguousConversion - Returns whether this overload
/// candidate requires an ambiguous conversion or not.
bool hasAmbiguousConversion() const {
for (SmallVectorImpl<ImplicitConversionSequence>::const_iterator
I = Conversions.begin(), E = Conversions.end(); I != E; ++I) {
if (!I->isInitialized()) return false;
if (I->isAmbiguous()) return true;
for (unsigned i = 0, e = NumConversions; i != e; ++i) {
if (!Conversions[i].isInitialized()) return false;
if (Conversions[i].isAmbiguous()) return true;
}
return false;
}
@ -684,13 +694,19 @@ namespace clang {
SmallVector<OverloadCandidate, 16> Candidates;
llvm::SmallPtrSet<Decl *, 16> Functions;
// Allocator for OverloadCandidate::Conversions. We store the first few
// elements inline to avoid allocation for small sets.
llvm::BumpPtrAllocator ConversionSequenceAllocator;
size_t InlineSpace[16*sizeof(ImplicitConversionSequence) / sizeof(size_t)];
unsigned NumInlineSequences;
SourceLocation Loc;
OverloadCandidateSet(const OverloadCandidateSet &);
OverloadCandidateSet &operator=(const OverloadCandidateSet &);
public:
OverloadCandidateSet(SourceLocation Loc) : Loc(Loc) {}
OverloadCandidateSet(SourceLocation Loc) : NumInlineSequences(0), Loc(Loc){}
SourceLocation getLocation() const { return Loc; }
@ -714,8 +730,27 @@ namespace clang {
/// to the overload set.
OverloadCandidate &addCandidate(unsigned NumConversions = 0) {
Candidates.push_back(OverloadCandidate());
Candidates.back().Conversions.resize(NumConversions);
return Candidates.back();
OverloadCandidate &C = Candidates.back();
// Assign space from the inline array if there are enough free slots
// available.
if (NumConversions + NumInlineSequences < 16) {
ImplicitConversionSequence *I =
(ImplicitConversionSequence*)InlineSpace;
C.Conversions = &I[NumInlineSequences];
NumInlineSequences += NumConversions;
} else {
// Otherwise get memory from the allocator.
C.Conversions = ConversionSequenceAllocator
.Allocate<ImplicitConversionSequence>(NumConversions);
}
// Construct the new objects.
for (unsigned i = 0; i != NumConversions; ++i)
new (&C.Conversions[i]) ImplicitConversionSequence();
C.NumConversions = NumConversions;
return C;
}
/// Find the best viable function on this overload set, if it exists.

View File

@ -543,6 +543,7 @@ OverloadCandidate::DeductionFailureInfo::getSecondArg() {
void OverloadCandidateSet::clear() {
Candidates.clear();
Functions.clear();
NumInlineSequences = 0;
}
namespace {
@ -7018,8 +7019,8 @@ isBetterOverloadCandidate(Sema &S,
// A viable function F1 is defined to be a better function than another
// viable function F2 if for all arguments i, ICSi(F1) is not a worse
// conversion sequence than ICSi(F2), and then...
unsigned NumArgs = Cand1.Conversions.size();
assert(Cand2.Conversions.size() == NumArgs && "Overload candidate mismatch");
unsigned NumArgs = Cand1.NumConversions;
assert(Cand2.NumConversions == NumArgs && "Overload candidate mismatch");
bool HasBetterConversion = false;
for (unsigned ArgIdx = StartArg; ArgIdx < NumArgs; ++ArgIdx) {
switch (CompareImplicitConversionSequences(S,
@ -7718,7 +7719,7 @@ void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
case ovl_fail_bad_conversion: {
unsigned I = (Cand->IgnoreObjectArgument ? 1 : 0);
for (unsigned N = Cand->Conversions.size(); I != N; ++I)
for (unsigned N = Cand->NumConversions; I != N; ++I)
if (Cand->Conversions[I].isBad())
return DiagnoseBadConversion(S, Cand, I);
@ -7770,12 +7771,12 @@ void NoteBuiltinOperatorCandidate(Sema &S,
const char *Opc,
SourceLocation OpLoc,
OverloadCandidate *Cand) {
assert(Cand->Conversions.size() <= 2 && "builtin operator is not binary");
assert(Cand->NumConversions <= 2 && "builtin operator is not binary");
std::string TypeStr("operator");
TypeStr += Opc;
TypeStr += "(";
TypeStr += Cand->BuiltinTypes.ParamTypes[0].getAsString();
if (Cand->Conversions.size() == 1) {
if (Cand->NumConversions == 1) {
TypeStr += ")";
S.Diag(OpLoc, diag::note_ovl_builtin_unary_candidate) << TypeStr;
} else {
@ -7788,7 +7789,7 @@ void NoteBuiltinOperatorCandidate(Sema &S,
void NoteAmbiguousUserConversions(Sema &S, SourceLocation OpLoc,
OverloadCandidate *Cand) {
unsigned NoOperands = Cand->Conversions.size();
unsigned NoOperands = Cand->NumConversions;
for (unsigned ArgIdx = 0; ArgIdx < NoOperands; ++ArgIdx) {
const ImplicitConversionSequence &ICS = Cand->Conversions[ArgIdx];
if (ICS.isBad()) break; // all meaningless after first invalid
@ -7892,11 +7893,11 @@ struct CompareOverloadCandidatesForDisplay {
// If there's any ordering between the defined conversions...
// FIXME: this might not be transitive.
assert(L->Conversions.size() == R->Conversions.size());
assert(L->NumConversions == R->NumConversions);
int leftBetter = 0;
unsigned I = (L->IgnoreObjectArgument || R->IgnoreObjectArgument);
for (unsigned E = L->Conversions.size(); I != E; ++I) {
for (unsigned E = L->NumConversions; I != E; ++I) {
switch (CompareImplicitConversionSequences(S,
L->Conversions[I],
R->Conversions[I])) {
@ -7959,7 +7960,7 @@ void CompleteNonViableCandidate(Sema &S, OverloadCandidate *Cand,
// Skip forward to the first bad conversion.
unsigned ConvIdx = (Cand->IgnoreObjectArgument ? 1 : 0);
unsigned ConvCount = Cand->Conversions.size();
unsigned ConvCount = Cand->NumConversions;
while (true) {
assert(ConvIdx != ConvCount && "no bad conversion in candidate");
ConvIdx++;