forked from OSchip/llvm-project
[analyzer] Use faster hashing (MD5) in CloneDetector.
This replaces the old approach of fingerprinting every AST node into a string, which avoided collisions and was simple to implement, but turned out to be extremely ineffective with respect to both performance and memory. The collisions are now dealt with in a separate pass, which no longer causes performance problems because collisions are rare. Patch by Raphael Isemann! Differential Revision: https://reviews.llvm.org/D22515 llvm-svn: 279378
This commit is contained in:
parent
8176ee9b5d
commit
5657486854
|
@ -16,6 +16,7 @@
|
||||||
#define LLVM_CLANG_AST_CLONEDETECTION_H
|
#define LLVM_CLANG_AST_CLONEDETECTION_H
|
||||||
|
|
||||||
#include "clang/Basic/SourceLocation.h"
|
#include "clang/Basic/SourceLocation.h"
|
||||||
|
#include "llvm/ADT/Hashing.h"
|
||||||
#include "llvm/ADT/StringMap.h"
|
#include "llvm/ADT/StringMap.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -163,11 +164,20 @@ public:
|
||||||
/// Holds the data about a StmtSequence that is needed during the search for
|
/// Holds the data about a StmtSequence that is needed during the search for
|
||||||
/// code clones.
|
/// code clones.
|
||||||
struct CloneSignature {
|
struct CloneSignature {
|
||||||
/// \brief Holds all relevant data of a StmtSequence.
|
/// \brief The hash code of the StmtSequence.
|
||||||
///
|
///
|
||||||
/// If this variable is equal for two different StmtSequences, then they can
|
/// The initial clone groups that are formed during the search for clones
|
||||||
/// be considered clones of each other.
|
/// consist only of Sequences that share the same hash code. This makes this
|
||||||
std::vector<DataPiece> Data;
|
/// value the central part of this heuristic that is needed to find clones
|
||||||
|
/// in a performant way. For this to work, the type of this variable
|
||||||
|
/// always needs to be small and fast to compare.
|
||||||
|
///
|
||||||
|
/// Also, StmtSequences that are clones of each others have to share
|
||||||
|
/// the same hash code. StmtSequences that are not clones of each other
|
||||||
|
/// shouldn't share the same hash code, but if they do, it will only
|
||||||
|
/// degrade the performance of the hash search but doesn't influence
|
||||||
|
/// the correctness of the result.
|
||||||
|
size_t Hash;
|
||||||
|
|
||||||
/// \brief The complexity of the StmtSequence.
|
/// \brief The complexity of the StmtSequence.
|
||||||
///
|
///
|
||||||
|
@ -186,14 +196,8 @@ public:
|
||||||
/// \brief Creates an empty CloneSignature without any data.
|
/// \brief Creates an empty CloneSignature without any data.
|
||||||
CloneSignature() : Complexity(1) {}
|
CloneSignature() : Complexity(1) {}
|
||||||
|
|
||||||
CloneSignature(const std::vector<unsigned> &Data, unsigned Complexity)
|
CloneSignature(llvm::hash_code Hash, unsigned Complexity)
|
||||||
: Data(Data), Complexity(Complexity) {}
|
: Hash(Hash), Complexity(Complexity) {}
|
||||||
|
|
||||||
/// \brief Adds the data from the given CloneSignature to this one.
|
|
||||||
void add(const CloneSignature &Other) {
|
|
||||||
Data.insert(Data.end(), Other.Data.begin(), Other.Data.end());
|
|
||||||
Complexity += Other.Complexity;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Holds group of StmtSequences that are clones of each other and the
|
/// Holds group of StmtSequences that are clones of each other and the
|
||||||
|
@ -201,10 +205,12 @@ public:
|
||||||
/// StmtSequences have in common.
|
/// StmtSequences have in common.
|
||||||
struct CloneGroup {
|
struct CloneGroup {
|
||||||
std::vector<StmtSequence> Sequences;
|
std::vector<StmtSequence> Sequences;
|
||||||
unsigned Complexity;
|
CloneSignature Signature;
|
||||||
|
|
||||||
CloneGroup(const StmtSequence &Seq, unsigned Complexity)
|
CloneGroup() {}
|
||||||
: Complexity(Complexity) {
|
|
||||||
|
CloneGroup(const StmtSequence &Seq, CloneSignature Signature)
|
||||||
|
: Signature(Signature) {
|
||||||
Sequences.push_back(Seq);
|
Sequences.push_back(Seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -269,11 +275,8 @@ public:
|
||||||
unsigned MinGroupComplexity);
|
unsigned MinGroupComplexity);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Stores all found clone groups including invalid groups with only a single
|
/// Stores all encountered StmtSequences alongside their CloneSignature.
|
||||||
/// statement.
|
std::vector<std::pair<CloneSignature, StmtSequence>> Sequences;
|
||||||
std::vector<CloneGroup> CloneGroups;
|
|
||||||
/// Maps search data to its related index in the \p CloneGroups vector.
|
|
||||||
llvm::StringMap<std::size_t> CloneGroupIndexes;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace clang
|
} // end namespace clang
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include "clang/AST/StmtVisitor.h"
|
#include "clang/AST/StmtVisitor.h"
|
||||||
#include "clang/Lex/Lexer.h"
|
#include "clang/Lex/Lexer.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/Support/MD5.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
|
@ -279,47 +280,35 @@ namespace {
|
||||||
/// This class defines what a code clone is: If it collects for two statements
|
/// This class defines what a code clone is: If it collects for two statements
|
||||||
/// the same data, then those two statements are considered to be clones of each
|
/// the same data, then those two statements are considered to be clones of each
|
||||||
/// other.
|
/// other.
|
||||||
class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector> {
|
///
|
||||||
|
/// All collected data is forwarded to the given data consumer of the type T.
|
||||||
|
/// The data consumer class needs to provide a member method with the signature:
|
||||||
|
/// update(StringRef Str)
|
||||||
|
template <typename T>
|
||||||
|
class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector<T>> {
|
||||||
|
|
||||||
ASTContext &Context;
|
ASTContext &Context;
|
||||||
std::vector<CloneDetector::DataPiece> &CollectedData;
|
/// \brief The data sink to which all data is forwarded.
|
||||||
|
T &DataConsumer;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// \brief Collects data of the given Stmt.
|
/// \brief Collects data of the given Stmt.
|
||||||
/// \param S The given statement.
|
/// \param S The given statement.
|
||||||
/// \param Context The ASTContext of S.
|
/// \param Context The ASTContext of S.
|
||||||
/// \param D The given data vector to which all collected data is appended.
|
/// \param D The data sink to which all data is forwarded.
|
||||||
StmtDataCollector(const Stmt *S, ASTContext &Context,
|
StmtDataCollector(const Stmt *S, ASTContext &Context, T &DataConsumer)
|
||||||
std::vector<CloneDetector::DataPiece> &D)
|
: Context(Context), DataConsumer(DataConsumer) {
|
||||||
: Context(Context), CollectedData(D) {
|
this->Visit(S);
|
||||||
Visit(S);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Below are utility methods for appending different data to the vector.
|
// Below are utility methods for appending different data to the vector.
|
||||||
|
|
||||||
void addData(CloneDetector::DataPiece Integer) {
|
void addData(CloneDetector::DataPiece Integer) {
|
||||||
CollectedData.push_back(Integer);
|
DataConsumer.update(
|
||||||
|
StringRef(reinterpret_cast<char *>(&Integer), sizeof(Integer)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: The functions below add long strings to the data vector which are
|
void addData(llvm::StringRef Str) { DataConsumer.update(Str); }
|
||||||
// probably not good for performance. Replace the strings with pointer values
|
|
||||||
// or a some other unique integer.
|
|
||||||
|
|
||||||
void addData(llvm::StringRef Str) {
|
|
||||||
if (Str.empty())
|
|
||||||
return;
|
|
||||||
|
|
||||||
const size_t OldSize = CollectedData.size();
|
|
||||||
|
|
||||||
const size_t PieceSize = sizeof(CloneDetector::DataPiece);
|
|
||||||
// Calculate how many vector units we need to accomodate all string bytes.
|
|
||||||
size_t RoundedUpPieceNumber = (Str.size() + PieceSize - 1) / PieceSize;
|
|
||||||
// Allocate space for the string in the data vector.
|
|
||||||
CollectedData.resize(CollectedData.size() + RoundedUpPieceNumber);
|
|
||||||
|
|
||||||
// Copy the string to the allocated space at the end of the vector.
|
|
||||||
std::memcpy(CollectedData.data() + OldSize, Str.data(), Str.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
void addData(const QualType &QT) { addData(QT.getAsString()); }
|
void addData(const QualType &QT) { addData(QT.getAsString()); }
|
||||||
|
|
||||||
|
@ -484,8 +473,10 @@ class CloneSignatureGenerator {
|
||||||
// Create an empty signature that will be filled in this method.
|
// Create an empty signature that will be filled in this method.
|
||||||
CloneDetector::CloneSignature Signature;
|
CloneDetector::CloneSignature Signature;
|
||||||
|
|
||||||
// Collect all relevant data from S and put it into the empty signature.
|
llvm::MD5 Hash;
|
||||||
StmtDataCollector(S, Context, Signature.Data);
|
|
||||||
|
// Collect all relevant data from S and hash it.
|
||||||
|
StmtDataCollector<llvm::MD5>(S, Context, Hash);
|
||||||
|
|
||||||
// Look up what macros expanded into the current statement.
|
// Look up what macros expanded into the current statement.
|
||||||
std::string StartMacroStack = getMacroStack(S->getLocStart(), Context);
|
std::string StartMacroStack = getMacroStack(S->getLocStart(), Context);
|
||||||
|
@ -523,7 +514,9 @@ class CloneSignatureGenerator {
|
||||||
auto ChildSignature = generateSignatures(Child, StartMacroStack);
|
auto ChildSignature = generateSignatures(Child, StartMacroStack);
|
||||||
|
|
||||||
// Add the collected data to the signature of the current statement.
|
// Add the collected data to the signature of the current statement.
|
||||||
Signature.add(ChildSignature);
|
Signature.Complexity += ChildSignature.Complexity;
|
||||||
|
Hash.update(StringRef(reinterpret_cast<char *>(&ChildSignature.Hash),
|
||||||
|
sizeof(ChildSignature.Hash)));
|
||||||
|
|
||||||
// If the current statement is a CompoundStatement, we need to store the
|
// If the current statement is a CompoundStatement, we need to store the
|
||||||
// signature for the generation of the sub-sequences.
|
// signature for the generation of the sub-sequences.
|
||||||
|
@ -536,6 +529,14 @@ class CloneSignatureGenerator {
|
||||||
if (CS)
|
if (CS)
|
||||||
handleSubSequences(CS, ChildSignatures);
|
handleSubSequences(CS, ChildSignatures);
|
||||||
|
|
||||||
|
// Create the final hash code for the current signature.
|
||||||
|
llvm::MD5::MD5Result HashResult;
|
||||||
|
Hash.final(HashResult);
|
||||||
|
|
||||||
|
// Copy as much of the generated hash code to the signature's hash code.
|
||||||
|
std::memcpy(&Signature.Hash, &HashResult,
|
||||||
|
std::min(sizeof(Signature.Hash), sizeof(HashResult)));
|
||||||
|
|
||||||
// Save the signature for the current statement in the CloneDetector object.
|
// Save the signature for the current statement in the CloneDetector object.
|
||||||
CD.add(StmtSequence(S, Context), Signature);
|
CD.add(StmtSequence(S, Context), Signature);
|
||||||
|
|
||||||
|
@ -564,11 +565,24 @@ class CloneSignatureGenerator {
|
||||||
// Create an empty signature and add the signatures of all selected
|
// Create an empty signature and add the signatures of all selected
|
||||||
// child statements to it.
|
// child statements to it.
|
||||||
CloneDetector::CloneSignature SubSignature;
|
CloneDetector::CloneSignature SubSignature;
|
||||||
|
llvm::MD5 SubHash;
|
||||||
|
|
||||||
for (unsigned i = Pos; i < Pos + Length; ++i) {
|
for (unsigned i = Pos; i < Pos + Length; ++i) {
|
||||||
SubSignature.add(ChildSignatures[i]);
|
SubSignature.Complexity += ChildSignatures[i].Complexity;
|
||||||
|
size_t ChildHash = ChildSignatures[i].Hash;
|
||||||
|
|
||||||
|
SubHash.update(StringRef(reinterpret_cast<char *>(&ChildHash),
|
||||||
|
sizeof(ChildHash)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create the final hash code for the current signature.
|
||||||
|
llvm::MD5::MD5Result HashResult;
|
||||||
|
SubHash.final(HashResult);
|
||||||
|
|
||||||
|
// Copy as much of the generated hash code to the signature's hash code.
|
||||||
|
std::memcpy(&SubSignature.Hash, &HashResult,
|
||||||
|
std::min(sizeof(SubSignature.Hash), sizeof(HashResult)));
|
||||||
|
|
||||||
// Save the signature together with the information about what children
|
// Save the signature together with the information about what children
|
||||||
// sequence we selected.
|
// sequence we selected.
|
||||||
CD.add(StmtSequence(CS, Context, Pos, Pos + Length), SubSignature);
|
CD.add(StmtSequence(CS, Context, Pos, Pos + Length), SubSignature);
|
||||||
|
@ -594,25 +608,7 @@ void CloneDetector::analyzeCodeBody(const Decl *D) {
|
||||||
|
|
||||||
void CloneDetector::add(const StmtSequence &S,
|
void CloneDetector::add(const StmtSequence &S,
|
||||||
const CloneSignature &Signature) {
|
const CloneSignature &Signature) {
|
||||||
// StringMap only works with StringRefs, so we create one for our data vector.
|
Sequences.push_back(std::make_pair(Signature, S));
|
||||||
auto &Data = Signature.Data;
|
|
||||||
StringRef DataRef = StringRef(reinterpret_cast<const char *>(Data.data()),
|
|
||||||
Data.size() * sizeof(unsigned));
|
|
||||||
|
|
||||||
// Search with the help of the signature if we already have encountered a
|
|
||||||
// clone of the given StmtSequence.
|
|
||||||
auto I = CloneGroupIndexes.find(DataRef);
|
|
||||||
if (I == CloneGroupIndexes.end()) {
|
|
||||||
// We haven't found an existing clone group, so we create a new clone group
|
|
||||||
// for this StmtSequence and store the index of it in our search map.
|
|
||||||
CloneGroupIndexes[DataRef] = CloneGroups.size();
|
|
||||||
CloneGroups.emplace_back(S, Signature.Complexity);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We have found an existing clone group and can expand it with the given
|
|
||||||
// StmtSequence.
|
|
||||||
CloneGroups[I->getValue()].Sequences.push_back(S);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -645,14 +641,66 @@ bool containsGroup(CloneDetector::CloneGroup &Group,
|
||||||
}
|
}
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/// \brief Wrapper around FoldingSetNodeID that it can be used as the template
|
||||||
|
/// argument of the StmtDataCollector.
|
||||||
|
class FoldingSetNodeIDWrapper {
|
||||||
|
|
||||||
|
llvm::FoldingSetNodeID &FS;
|
||||||
|
|
||||||
|
public:
|
||||||
|
FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
|
||||||
|
|
||||||
|
void update(StringRef Str) { FS.AddString(Str); }
|
||||||
|
};
|
||||||
|
} // end anonymous namespace
|
||||||
|
|
||||||
|
/// \brief Writes the relevant data from all statements and child statements
|
||||||
|
/// in the given StmtSequence into the given FoldingSetNodeID.
|
||||||
|
static void CollectStmtSequenceData(const StmtSequence &Sequence,
|
||||||
|
FoldingSetNodeIDWrapper &OutputData) {
|
||||||
|
for (const Stmt *S : Sequence) {
|
||||||
|
StmtDataCollector<FoldingSetNodeIDWrapper>(S, Sequence.getASTContext(),
|
||||||
|
OutputData);
|
||||||
|
|
||||||
|
for (const Stmt *Child : S->children()) {
|
||||||
|
if (!Child)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
CollectStmtSequenceData(StmtSequence(Child, Sequence.getASTContext()),
|
||||||
|
OutputData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Returns true if both sequences are clones of each other.
|
||||||
|
static bool areSequencesClones(const StmtSequence &LHS,
|
||||||
|
const StmtSequence &RHS) {
|
||||||
|
// We collect the data from all statements in the sequence as we did before
|
||||||
|
// when generating a hash value for each sequence. But this time we don't
|
||||||
|
// hash the collected data and compare the whole data set instead. This
|
||||||
|
// prevents any false-positives due to hash code collisions.
|
||||||
|
llvm::FoldingSetNodeID DataLHS, DataRHS;
|
||||||
|
FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
|
||||||
|
FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
|
||||||
|
|
||||||
|
CollectStmtSequenceData(LHS, LHSWrapper);
|
||||||
|
CollectStmtSequenceData(RHS, RHSWrapper);
|
||||||
|
|
||||||
|
return DataLHS == DataRHS;
|
||||||
|
}
|
||||||
|
|
||||||
/// \brief Finds all actual clone groups in a single group of presumed clones.
|
/// \brief Finds all actual clone groups in a single group of presumed clones.
|
||||||
/// \param Result Output parameter to which all found groups are added. Every
|
/// \param Result Output parameter to which all found groups are added.
|
||||||
/// clone in a group that was added this way follows the same
|
/// \param Group A group of presumed clones. The clones are allowed to have a
|
||||||
/// variable pattern as the other clones in its group.
|
/// different variable pattern and may not be actual clones of each
|
||||||
/// \param Group A group of clones. The clones are allowed to have a different
|
/// other.
|
||||||
/// variable pattern.
|
/// \param CheckVariablePatterns If true, every clone in a group that was added
|
||||||
|
/// to the output follows the same variable pattern as the other
|
||||||
|
/// clones in its group.
|
||||||
static void createCloneGroups(std::vector<CloneDetector::CloneGroup> &Result,
|
static void createCloneGroups(std::vector<CloneDetector::CloneGroup> &Result,
|
||||||
const CloneDetector::CloneGroup &Group) {
|
const CloneDetector::CloneGroup &Group,
|
||||||
|
bool CheckVariablePattern) {
|
||||||
// We remove the Sequences one by one, so a list is more appropriate.
|
// We remove the Sequences one by one, so a list is more appropriate.
|
||||||
std::list<StmtSequence> UnassignedSequences(Group.Sequences.begin(),
|
std::list<StmtSequence> UnassignedSequences(Group.Sequences.begin(),
|
||||||
Group.Sequences.end());
|
Group.Sequences.end());
|
||||||
|
@ -664,7 +712,7 @@ static void createCloneGroups(std::vector<CloneDetector::CloneGroup> &Result,
|
||||||
StmtSequence Prototype = UnassignedSequences.front();
|
StmtSequence Prototype = UnassignedSequences.front();
|
||||||
UnassignedSequences.pop_front();
|
UnassignedSequences.pop_front();
|
||||||
|
|
||||||
CloneDetector::CloneGroup FilteredGroup(Prototype, Group.Complexity);
|
CloneDetector::CloneGroup FilteredGroup(Prototype, Group.Signature);
|
||||||
|
|
||||||
// Analyze the variable pattern of the prototype. Every other StmtSequence
|
// Analyze the variable pattern of the prototype. Every other StmtSequence
|
||||||
// needs to have the same pattern to get into the new clone group.
|
// needs to have the same pattern to get into the new clone group.
|
||||||
|
@ -674,12 +722,27 @@ static void createCloneGroups(std::vector<CloneDetector::CloneGroup> &Result,
|
||||||
// and assign them to our new clone group.
|
// and assign them to our new clone group.
|
||||||
auto I = UnassignedSequences.begin(), E = UnassignedSequences.end();
|
auto I = UnassignedSequences.begin(), E = UnassignedSequences.end();
|
||||||
while (I != E) {
|
while (I != E) {
|
||||||
|
// If the sequence doesn't fit to the prototype, we have encountered
|
||||||
|
// an unintended hash code collision and we skip it.
|
||||||
|
if (!areSequencesClones(Prototype, *I)) {
|
||||||
|
++I;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (VariablePattern(*I).countPatternDifferences(PrototypeFeatures) == 0) {
|
// If we weren't asked to check for a matching variable pattern in clone
|
||||||
|
// groups we can add the sequence now to the new clone group.
|
||||||
|
// If we were asked to check for matching variable pattern, we first have
|
||||||
|
// to check that there are no differences between the two patterns and
|
||||||
|
// only proceed if they match.
|
||||||
|
if (!CheckVariablePattern ||
|
||||||
|
VariablePattern(*I).countPatternDifferences(PrototypeFeatures) == 0) {
|
||||||
FilteredGroup.Sequences.push_back(*I);
|
FilteredGroup.Sequences.push_back(*I);
|
||||||
I = UnassignedSequences.erase(I);
|
I = UnassignedSequences.erase(I);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We didn't found a matching variable pattern, so we continue with the
|
||||||
|
// next sequence.
|
||||||
++I;
|
++I;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -694,14 +757,76 @@ static void createCloneGroups(std::vector<CloneDetector::CloneGroup> &Result,
|
||||||
void CloneDetector::findClones(std::vector<CloneGroup> &Result,
|
void CloneDetector::findClones(std::vector<CloneGroup> &Result,
|
||||||
unsigned MinGroupComplexity,
|
unsigned MinGroupComplexity,
|
||||||
bool CheckPatterns) {
|
bool CheckPatterns) {
|
||||||
|
// A shortcut (and necessary for the for-loop later in this function).
|
||||||
|
if (Sequences.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
// We need to search for groups of StmtSequences with the same hash code to
|
||||||
|
// create our initial clone groups. By sorting all known StmtSequences by
|
||||||
|
// their hash value we make sure that StmtSequences with the same hash code
|
||||||
|
// are grouped together in the Sequences vector.
|
||||||
|
// Note: We stable sort here because the StmtSequences are added in the order
|
||||||
|
// in which they appear in the source file. We want to preserve that order
|
||||||
|
// because we also want to report them in that order in the CloneChecker.
|
||||||
|
std::stable_sort(Sequences.begin(), Sequences.end(),
|
||||||
|
[](std::pair<CloneSignature, StmtSequence> LHS,
|
||||||
|
std::pair<CloneSignature, StmtSequence> RHS) {
|
||||||
|
return LHS.first.Hash < RHS.first.Hash;
|
||||||
|
});
|
||||||
|
|
||||||
|
std::vector<CloneGroup> CloneGroups;
|
||||||
|
|
||||||
|
// Check for each CloneSignature if its successor has the same hash value.
|
||||||
|
// We don't check the last CloneSignature as it has no successor.
|
||||||
|
// Note: The 'size - 1' in the condition is safe because we check for an empty
|
||||||
|
// Sequences vector at the beginning of this function.
|
||||||
|
for (unsigned i = 0; i < Sequences.size() - 1; ++i) {
|
||||||
|
const auto Current = Sequences[i];
|
||||||
|
const auto Next = Sequences[i + 1];
|
||||||
|
|
||||||
|
if (Current.first.Hash != Next.first.Hash)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// It's likely that we just found an sequence of CloneSignatures that
|
||||||
|
// represent a CloneGroup, so we create a new group and start checking and
|
||||||
|
// adding the CloneSignatures in this sequence.
|
||||||
|
CloneGroup Group;
|
||||||
|
Group.Signature = Current.first;
|
||||||
|
|
||||||
|
for (; i < Sequences.size(); ++i) {
|
||||||
|
const auto &Signature = Sequences[i];
|
||||||
|
|
||||||
|
// A different hash value means we have reached the end of the sequence.
|
||||||
|
if (Current.first.Hash != Signature.first.Hash) {
|
||||||
|
// The current Signature could be the start of a new CloneGroup. So we
|
||||||
|
// decrement i so that we visit it again in the outer loop.
|
||||||
|
// Note: i can never be 0 at this point because we are just comparing
|
||||||
|
// the hash of the Current CloneSignature with itself in the 'if' above.
|
||||||
|
assert(i != 0);
|
||||||
|
--i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip CloneSignatures that won't pass the complexity requirement.
|
||||||
|
if (Signature.first.Complexity < MinGroupComplexity)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Group.Sequences.push_back(Signature.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
// There is a chance that we haven't found more than two fitting
|
||||||
|
// CloneSignature because not enough CloneSignatures passed the complexity
|
||||||
|
// requirement. As a CloneGroup with less than two members makes no sense,
|
||||||
|
// we ignore this CloneGroup and won't add it to the result.
|
||||||
|
if (!Group.isValid())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
CloneGroups.push_back(Group);
|
||||||
|
}
|
||||||
|
|
||||||
// Add every valid clone group that fulfills the complexity requirement.
|
// Add every valid clone group that fulfills the complexity requirement.
|
||||||
for (const CloneGroup &Group : CloneGroups) {
|
for (const CloneGroup &Group : CloneGroups) {
|
||||||
if (Group.isValid() && Group.Complexity >= MinGroupComplexity) {
|
createCloneGroups(Result, Group, CheckPatterns);
|
||||||
if (CheckPatterns)
|
|
||||||
createCloneGroups(Result, Group);
|
|
||||||
else
|
|
||||||
Result.push_back(Group);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<unsigned> IndexesToRemove;
|
std::vector<unsigned> IndexesToRemove;
|
||||||
|
|
|
@ -91,15 +91,6 @@ void CloneChecker::reportClones(SourceManager &SM, AnalysisManager &Mgr,
|
||||||
"Related code clone is here.");
|
"Related code clone is here.");
|
||||||
|
|
||||||
for (CloneDetector::CloneGroup &Group : CloneGroups) {
|
for (CloneDetector::CloneGroup &Group : CloneGroups) {
|
||||||
// For readability reasons we sort the clones by line numbers.
|
|
||||||
std::sort(Group.Sequences.begin(), Group.Sequences.end(),
|
|
||||||
[&SM](const StmtSequence &LHS, const StmtSequence &RHS) {
|
|
||||||
return SM.isBeforeInTranslationUnit(LHS.getStartLoc(),
|
|
||||||
RHS.getStartLoc()) &&
|
|
||||||
SM.isBeforeInTranslationUnit(LHS.getEndLoc(),
|
|
||||||
RHS.getEndLoc());
|
|
||||||
});
|
|
||||||
|
|
||||||
// We group the clones by printing the first as a warning and all others
|
// We group the clones by printing the first as a warning and all others
|
||||||
// as a note.
|
// as a note.
|
||||||
DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID);
|
DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID);
|
||||||
|
|
Loading…
Reference in New Issue