forked from OSchip/llvm-project
[analyzer] Add basic capabilities to detect source code clones.
This patch adds the CloneDetector class which allows searching source code for clones. For every statement or group of statements within a compound statement, CloneDetector computes a hash value, and finds clones by detecting identical hash values. This initial patch only provides a simple hashing mechanism that hashes the kind of each sub-statement. This patch also adds CloneChecker - a simple static analyzer checker that uses CloneDetector to report copy-pasted code. Patch by Raphael Isemann! Differential Revision: https://reviews.llvm.org/D20795 llvm-svn: 276782
This commit is contained in:
parent
f8df4d1117
commit
ba816326f3
|
@ -0,0 +1,235 @@
|
|||
//===--- CloneDetection.h - Finds code clones in an AST ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// /file
|
||||
/// This file defines classes for searching and anlyzing source code clones.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_AST_CLONEDETECTION_H
|
||||
#define LLVM_CLANG_AST_CLONEDETECTION_H
|
||||
|
||||
#include "clang/Basic/SourceLocation.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace clang {
|
||||
|
||||
class Stmt;
|
||||
class Decl;
|
||||
class ASTContext;
|
||||
class CompoundStmt;
|
||||
|
||||
/// \brief Identifies a list of statements.
|
||||
///
|
||||
/// Can either identify a single arbitrary Stmt object, a continuous sequence of
|
||||
/// child statements inside a CompoundStmt or no statements at all.
|
||||
class StmtSequence {
|
||||
/// If this object identifies a sequence of statements inside a CompoundStmt,
|
||||
/// S points to this CompoundStmt. If this object only identifies a single
|
||||
/// Stmt, then S is a pointer to this Stmt.
|
||||
const Stmt *S;
|
||||
|
||||
/// The related ASTContext for S.
|
||||
ASTContext *Context;
|
||||
|
||||
/// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence
|
||||
/// instance is representing the CompoundStmt children inside the array
|
||||
/// [StartIndex, EndIndex).
|
||||
unsigned StartIndex;
|
||||
unsigned EndIndex;
|
||||
|
||||
public:
|
||||
/// \brief Constructs a StmtSequence holding multiple statements.
|
||||
///
|
||||
/// The resulting StmtSequence identifies a continuous sequence of statements
|
||||
/// in the body of the given CompoundStmt. Which statements of the body should
|
||||
/// be identified needs to be specified by providing a start and end index
|
||||
/// that describe a non-empty sub-array in the body of the given CompoundStmt.
|
||||
///
|
||||
/// \param Stmt A CompoundStmt that contains all statements in its body.
|
||||
/// \param Context The ASTContext for the given CompoundStmt.
|
||||
/// \param StartIndex The inclusive start index in the children array of
|
||||
/// \p Stmt
|
||||
/// \param EndIndex The exclusive end index in the children array of \p Stmt.
|
||||
StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,
|
||||
unsigned StartIndex, unsigned EndIndex);
|
||||
|
||||
/// \brief Constructs a StmtSequence holding a single statement.
|
||||
///
|
||||
/// \param Stmt An arbitrary Stmt.
|
||||
/// \param Context The ASTContext for the given Stmt.
|
||||
StmtSequence(const Stmt *Stmt, ASTContext &Context);
|
||||
|
||||
/// \brief Constructs an empty StmtSequence.
|
||||
StmtSequence();
|
||||
|
||||
typedef const Stmt *const *iterator;
|
||||
|
||||
/// Returns an iterator pointing to the first statement in this sequence.
|
||||
iterator begin() const;
|
||||
|
||||
/// Returns an iterator pointing behind the last statement in this sequence.
|
||||
iterator end() const;
|
||||
|
||||
/// Returns the first statement in this sequence.
|
||||
///
|
||||
/// This method should only be called on a non-empty StmtSequence object.
|
||||
const Stmt *front() const {
|
||||
assert(!empty());
|
||||
return begin()[0];
|
||||
}
|
||||
|
||||
/// Returns the last statement in this sequence.
|
||||
///
|
||||
/// This method should only be called on a non-empty StmtSequence object.
|
||||
const Stmt *back() const {
|
||||
assert(!empty());
|
||||
return begin()[size() - 1];
|
||||
}
|
||||
|
||||
/// Returns the number of statements this object holds.
|
||||
unsigned size() const {
|
||||
if (holdsSequence())
|
||||
return EndIndex - StartIndex;
|
||||
if (S == nullptr)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Returns true if and only if this StmtSequence contains no statements.
|
||||
bool empty() const { return size() == 0; }
|
||||
|
||||
/// Returns the related ASTContext for the stored Stmts.
|
||||
ASTContext &getASTContext() const {
|
||||
assert(Context);
|
||||
return *Context;
|
||||
}
|
||||
|
||||
/// Returns true if this objects holds a list of statements.
|
||||
bool holdsSequence() const { return EndIndex != 0; }
|
||||
|
||||
/// Returns the start sourcelocation of the first statement in this sequence.
|
||||
///
|
||||
/// This method should only be called on a non-empty StmtSequence object.
|
||||
SourceLocation getStartLoc() const;
|
||||
|
||||
/// Returns the end sourcelocation of the last statement in this sequence.
|
||||
///
|
||||
/// This method should only be called on a non-empty StmtSequence object.
|
||||
SourceLocation getEndLoc() const;
|
||||
|
||||
bool operator==(const StmtSequence &Other) const {
|
||||
return std::tie(S, StartIndex, EndIndex) ==
|
||||
std::tie(Other.S, Other.StartIndex, Other.EndIndex);
|
||||
}
|
||||
|
||||
bool operator!=(const StmtSequence &Other) const {
|
||||
return std::tie(S, StartIndex, EndIndex) !=
|
||||
std::tie(Other.S, Other.StartIndex, Other.EndIndex);
|
||||
}
|
||||
|
||||
/// Returns true if and only if this sequence covers a source range that
|
||||
/// contains the source range of the given sequence \p Other.
|
||||
///
|
||||
/// This method should only be called on a non-empty StmtSequence object
|
||||
/// and passed a non-empty StmtSequence object.
|
||||
bool contains(const StmtSequence &Other) const;
|
||||
};
|
||||
|
||||
/// \brief Searches for clones in source code.
|
||||
///
|
||||
/// First, this class needs a translation unit which is passed via
|
||||
/// \p analyzeTranslationUnit . It will then generate and store search data
|
||||
/// for all statements inside the given translation unit.
|
||||
/// Afterwards the generated data can be used to find code clones by calling
|
||||
/// \p findClones .
|
||||
///
|
||||
/// This class only searches for clones in exectuable source code
|
||||
/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations)
|
||||
/// are not supported.
|
||||
class CloneDetector {
|
||||
public:
|
||||
/// Holds the data about a StmtSequence that is needed during the search for
|
||||
/// code clones.
|
||||
struct CloneSignature {
|
||||
/// \brief Holds all relevant data of a StmtSequence.
|
||||
///
|
||||
/// If this variable is equal for two different StmtSequences, then they can
|
||||
/// be considered clones of each other.
|
||||
std::vector<unsigned> Data;
|
||||
|
||||
/// \brief The complexity of the StmtSequence.
|
||||
///
|
||||
/// This scalar value serves as a simple way of filtering clones that are
|
||||
/// too small to be reported. A greater value indicates that the related
|
||||
/// StmtSequence is probably more interesting to the user.
|
||||
unsigned Complexity;
|
||||
|
||||
/// \brief Creates an empty CloneSignature without any data.
|
||||
CloneSignature() : Complexity(1) {}
|
||||
|
||||
CloneSignature(const std::vector<unsigned> &Data, unsigned Complexity)
|
||||
: Data(Data), Complexity(Complexity) {}
|
||||
|
||||
/// \brief Adds the data from the given CloneSignature to this one.
|
||||
void add(const CloneSignature &Other) {
|
||||
Data.insert(Data.end(), Other.Data.begin(), Other.Data.end());
|
||||
Complexity += Other.Complexity;
|
||||
}
|
||||
};
|
||||
|
||||
/// Holds group of StmtSequences that are clones of each other and the
|
||||
/// complexity value (see CloneSignature::Complexity) that all stored
|
||||
/// StmtSequences have in common.
|
||||
struct CloneGroup {
|
||||
std::vector<StmtSequence> Sequences;
|
||||
unsigned Complexity;
|
||||
|
||||
CloneGroup(const StmtSequence &Seq, unsigned Complexity)
|
||||
: Complexity(Complexity) {
|
||||
Sequences.push_back(Seq);
|
||||
}
|
||||
|
||||
/// \brief Returns false if and only if this group should be skipped when
|
||||
/// searching for clones.
|
||||
bool isValid() const {
|
||||
// A clone group with only one member makes no sense, so we skip them.
|
||||
return Sequences.size() > 1;
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief Generates and stores search data for all statements in the body of
|
||||
/// the given Decl.
|
||||
void analyzeCodeBody(const Decl *D);
|
||||
|
||||
/// \brief Stores the CloneSignature to allow future querying.
|
||||
void add(const StmtSequence &S, const CloneSignature &Signature);
|
||||
|
||||
/// \brief Searches the provided statements for clones.
|
||||
///
|
||||
/// \param Result Output parameter that is filled with a list of found
|
||||
/// clone groups. Each group contains multiple StmtSequences
|
||||
/// that were identified to be clones of each other.
|
||||
/// \param MinGroupComplexity Only return clones which have at least this
|
||||
/// complexity value.
|
||||
void findClones(std::vector<CloneGroup> &Result, unsigned MinGroupComplexity);
|
||||
|
||||
private:
|
||||
/// Stores all found clone groups including invalid groups with only a single
|
||||
/// statement.
|
||||
std::vector<CloneGroup> CloneGroups;
|
||||
/// Maps search data to its related index in the \p CloneGroups vector.
|
||||
llvm::StringMap<std::size_t> CloneGroupIndexes;
|
||||
};
|
||||
|
||||
} // end namespace clang
|
||||
|
||||
#endif // LLVM_CLANG_AST_CLONEDETECTION_H
|
|
@ -77,6 +77,8 @@ def MPI : Package<"mpi">, InPackage<OptIn>;
|
|||
def LLVM : Package<"llvm">;
|
||||
def Debug : Package<"debug">;
|
||||
|
||||
def CloneDetectionAlpha : Package<"clone">, InPackage<Alpha>, Hidden;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Core Checkers.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -661,3 +663,17 @@ def BugHashDumper : Checker<"DumpBugHash">,
|
|||
DescFile<"DebugCheckers.cpp">;
|
||||
|
||||
} // end "debug"
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Clone Detection
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let ParentPackage = CloneDetectionAlpha in {
|
||||
|
||||
def CloneChecker : Checker<"CloneChecker">,
|
||||
HelpText<"Reports similar pieces of code.">,
|
||||
DescFile<"CloneChecker.cpp">;
|
||||
|
||||
} // end "clone"
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ add_clang_library(clangAnalysis
|
|||
CFGReachabilityAnalysis.cpp
|
||||
CFGStmtMap.cpp
|
||||
CallGraph.cpp
|
||||
CloneDetection.cpp
|
||||
CocoaConventions.cpp
|
||||
Consumed.cpp
|
||||
CodeInjector.cpp
|
||||
|
|
|
@ -0,0 +1,277 @@
|
|||
//===--- CloneDetection.cpp - Finds code clones in an AST -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// This file implements classes for searching and anlyzing source code clones.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clang/Analysis/CloneDetection.h"
|
||||
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/AST/RecursiveASTVisitor.h"
|
||||
#include "clang/AST/Stmt.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
|
||||
using namespace clang;
|
||||
|
||||
StmtSequence::StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,
|
||||
unsigned StartIndex, unsigned EndIndex)
|
||||
: S(Stmt), Context(&Context), StartIndex(StartIndex), EndIndex(EndIndex) {
|
||||
assert(Stmt && "Stmt must not be a nullptr");
|
||||
assert(StartIndex < EndIndex && "Given array should not be empty");
|
||||
assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt");
|
||||
}
|
||||
|
||||
StmtSequence::StmtSequence(const Stmt *Stmt, ASTContext &Context)
|
||||
: S(Stmt), Context(&Context), StartIndex(0), EndIndex(0) {}
|
||||
|
||||
StmtSequence::StmtSequence()
|
||||
: S(nullptr), Context(nullptr), StartIndex(0), EndIndex(0) {}
|
||||
|
||||
bool StmtSequence::contains(const StmtSequence &Other) const {
|
||||
// If both sequences reside in different translation units, they can never
|
||||
// contain each other.
|
||||
if (Context != Other.Context)
|
||||
return false;
|
||||
|
||||
const SourceManager &SM = Context->getSourceManager();
|
||||
|
||||
// Otherwise check if the start and end locations of the current sequence
|
||||
// surround the other sequence.
|
||||
bool StartIsInBounds =
|
||||
SM.isBeforeInTranslationUnit(getStartLoc(), Other.getStartLoc()) ||
|
||||
getStartLoc() == Other.getStartLoc();
|
||||
if (!StartIsInBounds)
|
||||
return false;
|
||||
|
||||
bool EndIsInBounds =
|
||||
SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) ||
|
||||
Other.getEndLoc() == getEndLoc();
|
||||
return EndIsInBounds;
|
||||
}
|
||||
|
||||
StmtSequence::iterator StmtSequence::begin() const {
|
||||
if (!holdsSequence()) {
|
||||
return &S;
|
||||
}
|
||||
auto CS = cast<CompoundStmt>(S);
|
||||
return CS->body_begin() + StartIndex;
|
||||
}
|
||||
|
||||
StmtSequence::iterator StmtSequence::end() const {
|
||||
if (!holdsSequence()) {
|
||||
return &S + 1;
|
||||
}
|
||||
auto CS = cast<CompoundStmt>(S);
|
||||
return CS->body_begin() + EndIndex;
|
||||
}
|
||||
|
||||
SourceLocation StmtSequence::getStartLoc() const {
|
||||
return front()->getLocStart();
|
||||
}
|
||||
|
||||
SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
|
||||
|
||||
namespace {
|
||||
/// Generates CloneSignatures for a set of statements and stores the results in
|
||||
/// a CloneDetector object.
|
||||
class CloneSignatureGenerator {
|
||||
|
||||
CloneDetector &CD;
|
||||
ASTContext &Context;
|
||||
|
||||
/// \brief Generates CloneSignatures for all statements in the given statement
|
||||
/// tree and stores them in the CloneDetector.
|
||||
///
|
||||
/// \param S The root of the given statement tree.
|
||||
/// \return The CloneSignature of the root statement.
|
||||
CloneDetector::CloneSignature generateSignatures(const Stmt *S) {
|
||||
// Create an empty signature that will be filled in this method.
|
||||
CloneDetector::CloneSignature Signature;
|
||||
|
||||
// The only relevant data for now is the class of the statement.
|
||||
// TODO: Collect statement class specific data.
|
||||
Signature.Data.push_back(S->getStmtClass());
|
||||
|
||||
// Storage for the signatures of the direct child statements. This is only
|
||||
// needed if the current statement is a CompoundStmt.
|
||||
std::vector<CloneDetector::CloneSignature> ChildSignatures;
|
||||
const CompoundStmt *CS = dyn_cast<const CompoundStmt>(S);
|
||||
|
||||
// The signature of a statement includes the signatures of its children.
|
||||
// Therefore we create the signatures for every child and add them to the
|
||||
// current signature.
|
||||
for (const Stmt *Child : S->children()) {
|
||||
// Some statements like 'if' can have nullptr children that we will skip.
|
||||
if (!Child)
|
||||
continue;
|
||||
|
||||
// Recursive call to create the signature of the child statement. This
|
||||
// will also create and store all clone groups in this child statement.
|
||||
auto ChildSignature = generateSignatures(Child);
|
||||
|
||||
// Add the collected data to the signature of the current statement.
|
||||
Signature.add(ChildSignature);
|
||||
|
||||
// If the current statement is a CompoundStatement, we need to store the
|
||||
// signature for the generation of the sub-sequences.
|
||||
if (CS)
|
||||
ChildSignatures.push_back(ChildSignature);
|
||||
}
|
||||
|
||||
// If the current statement is a CompoundStmt, we also need to create the
|
||||
// clone groups from the sub-sequences inside the children.
|
||||
if (CS)
|
||||
handleSubSequences(CS, ChildSignatures);
|
||||
|
||||
// Save the signature for the current statement in the CloneDetector object.
|
||||
CD.add(StmtSequence(S, Context), Signature);
|
||||
|
||||
return Signature;
|
||||
}
|
||||
|
||||
/// \brief Adds all possible sub-sequences in the child array of the given
|
||||
/// CompoundStmt to the CloneDetector.
|
||||
/// \param CS The given CompoundStmt.
|
||||
/// \param ChildSignatures A list of calculated signatures for each child in
|
||||
/// the given CompoundStmt.
|
||||
void handleSubSequences(
|
||||
const CompoundStmt *CS,
|
||||
const std::vector<CloneDetector::CloneSignature> &ChildSignatures) {
|
||||
|
||||
// FIXME: This function has quadratic runtime right now. Check if skipping
|
||||
// this function for too long CompoundStmts is an option.
|
||||
|
||||
// The length of the sub-sequence. We don't need to handle sequences with
|
||||
// the length 1 as they are already handled in CollectData().
|
||||
for (unsigned Length = 2; Length <= CS->size(); ++Length) {
|
||||
// The start index in the body of the CompoundStmt. We increase the
|
||||
// position until the end of the sub-sequence reaches the end of the
|
||||
// CompoundStmt body.
|
||||
for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) {
|
||||
// Create an empty signature and add the signatures of all selected
|
||||
// child statements to it.
|
||||
CloneDetector::CloneSignature SubSignature;
|
||||
|
||||
for (unsigned i = Pos; i < Pos + Length; ++i) {
|
||||
SubSignature.add(ChildSignatures[i]);
|
||||
}
|
||||
|
||||
// Save the signature together with the information about what children
|
||||
// sequence we selected.
|
||||
CD.add(StmtSequence(CS, Context, Pos, Pos + Length), SubSignature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
explicit CloneSignatureGenerator(CloneDetector &CD, ASTContext &Context)
|
||||
: CD(CD), Context(Context) {}
|
||||
|
||||
/// \brief Generates signatures for all statements in the given function body.
|
||||
void consumeCodeBody(const Stmt *S) { generateSignatures(S); }
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
void CloneDetector::analyzeCodeBody(const Decl *D) {
|
||||
assert(D);
|
||||
assert(D->hasBody());
|
||||
CloneSignatureGenerator Generator(*this, D->getASTContext());
|
||||
Generator.consumeCodeBody(D->getBody());
|
||||
}
|
||||
|
||||
void CloneDetector::add(const StmtSequence &S,
|
||||
const CloneSignature &Signature) {
|
||||
// StringMap only works with StringRefs, so we create one for our data vector.
|
||||
auto &Data = Signature.Data;
|
||||
StringRef DataRef = StringRef(reinterpret_cast<const char *>(Data.data()),
|
||||
Data.size() * sizeof(unsigned));
|
||||
|
||||
// Search with the help of the signature if we already have encountered a
|
||||
// clone of the given StmtSequence.
|
||||
auto I = CloneGroupIndexes.find(DataRef);
|
||||
if (I == CloneGroupIndexes.end()) {
|
||||
// We haven't found an existing clone group, so we create a new clone group
|
||||
// for this StmtSequence and store the index of it in our search map.
|
||||
CloneGroupIndexes[DataRef] = CloneGroups.size();
|
||||
CloneGroups.emplace_back(S, Signature.Complexity);
|
||||
return;
|
||||
}
|
||||
|
||||
// We have found an existing clone group and can expand it with the given
|
||||
// StmtSequence.
|
||||
CloneGroups[I->getValue()].Sequences.push_back(S);
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// \brief Returns true if and only if \p Stmt contains at least one other
|
||||
/// sequence in the \p Group.
|
||||
bool containsAnyInGroup(StmtSequence &Stmt,
|
||||
CloneDetector::CloneGroup &Group) {
|
||||
for (StmtSequence &GroupStmt : Group.Sequences) {
|
||||
if (Stmt.contains(GroupStmt))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Returns true if and only if all sequences in \p OtherGroup are
|
||||
/// contained by a sequence in \p Group.
|
||||
bool containsGroup(CloneDetector::CloneGroup &Group,
|
||||
CloneDetector::CloneGroup &OtherGroup) {
|
||||
// We have less sequences in the current group than we have in the other,
|
||||
// so we will never fulfill the requirement for returning true. This is only
|
||||
// possible because we know that a sequence in Group can contain at most
|
||||
// one sequence in OtherGroup.
|
||||
if (Group.Sequences.size() < OtherGroup.Sequences.size())
|
||||
return false;
|
||||
|
||||
for (StmtSequence &Stmt : Group.Sequences) {
|
||||
if (!containsAnyInGroup(Stmt, OtherGroup))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // end anonymous namespace
|
||||
|
||||
void CloneDetector::findClones(std::vector<CloneGroup> &Result,
|
||||
unsigned MinGroupComplexity) {
|
||||
// Add every valid clone group that fulfills the complexity requirement.
|
||||
for (const CloneGroup &Group : CloneGroups) {
|
||||
if (Group.isValid() && Group.Complexity >= MinGroupComplexity) {
|
||||
Result.push_back(Group);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<unsigned> IndexesToRemove;
|
||||
|
||||
// Compare every group in the result with the rest. If one groups contains
|
||||
// another group, we only need to return the bigger group.
|
||||
// Note: This doesn't scale well, so if possible avoid calling any heavy
|
||||
// function from this loop to minimize the performance impact.
|
||||
for (unsigned i = 0; i < Result.size(); ++i) {
|
||||
for (unsigned j = 0; j < Result.size(); ++j) {
|
||||
// Don't compare a group with itself.
|
||||
if (i == j)
|
||||
continue;
|
||||
|
||||
if (containsGroup(Result[j], Result[i])) {
|
||||
IndexesToRemove.push_back(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Erasing a list of indexes from the vector should be done with decreasing
|
||||
// indexes. As IndexesToRemove is constructed with increasing values, we just
|
||||
// reverse iterate over it to get the desired order.
|
||||
for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) {
|
||||
Result.erase(Result.begin() + *I);
|
||||
}
|
||||
}
|
|
@ -22,6 +22,7 @@ add_clang_library(clangStaticAnalyzerCheckers
|
|||
CheckerDocumentation.cpp
|
||||
ChrootChecker.cpp
|
||||
ClangCheckers.cpp
|
||||
CloneChecker.cpp
|
||||
CXXSelfAssignmentChecker.cpp
|
||||
DeadStoresChecker.cpp
|
||||
DebugCheckers.cpp
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
//===--- CloneChecker.cpp - Clone detection checker -------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// CloneChecker is a checker that reports clones in the current translation
|
||||
/// unit.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ClangSACheckers.h"
|
||||
#include "clang/Analysis/CloneDetection.h"
|
||||
#include "clang/Basic/Diagnostic.h"
|
||||
#include "clang/StaticAnalyzer/Core/Checker.h"
|
||||
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
|
||||
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
|
||||
|
||||
using namespace clang;
|
||||
using namespace ento;
|
||||
|
||||
namespace {
|
||||
class CloneChecker
|
||||
: public Checker<check::ASTCodeBody, check::EndOfTranslationUnit> {
|
||||
mutable CloneDetector CloneDetector;
|
||||
|
||||
public:
|
||||
void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
|
||||
BugReporter &BR) const;
|
||||
|
||||
void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
|
||||
AnalysisManager &Mgr, BugReporter &BR) const;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
void CloneChecker::checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
|
||||
BugReporter &BR) const {
|
||||
// Every statement that should be included in the search for clones needs to
|
||||
// be passed to the CloneDetector.
|
||||
CloneDetector.analyzeCodeBody(D);
|
||||
}
|
||||
|
||||
void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
|
||||
AnalysisManager &Mgr,
|
||||
BugReporter &BR) const {
|
||||
// At this point, every statement in the translation unit has been analyzed by
|
||||
// the CloneDetector. The only thing left to do is to report the found clones.
|
||||
|
||||
int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger(
|
||||
"MinimumCloneComplexity", 10, this);
|
||||
|
||||
assert(MinComplexity >= 0);
|
||||
|
||||
SourceManager &SM = BR.getSourceManager();
|
||||
|
||||
std::vector<CloneDetector::CloneGroup> CloneGroups;
|
||||
CloneDetector.findClones(CloneGroups, MinComplexity);
|
||||
|
||||
DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
|
||||
|
||||
unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
|
||||
"Detected code clone.");
|
||||
|
||||
unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
|
||||
"Related code clone is here.");
|
||||
|
||||
for (CloneDetector::CloneGroup &Group : CloneGroups) {
|
||||
// For readability reasons we sort the clones by line numbers.
|
||||
std::sort(Group.Sequences.begin(), Group.Sequences.end(),
|
||||
[&SM](const StmtSequence &LHS, const StmtSequence &RHS) {
|
||||
return SM.isBeforeInTranslationUnit(LHS.getStartLoc(),
|
||||
RHS.getStartLoc()) &&
|
||||
SM.isBeforeInTranslationUnit(LHS.getEndLoc(),
|
||||
RHS.getEndLoc());
|
||||
});
|
||||
|
||||
// We group the clones by printing the first as a warning and all others
|
||||
// as a note.
|
||||
DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID);
|
||||
for (unsigned i = 1; i < Group.Sequences.size(); ++i) {
|
||||
DiagEngine.Report(Group.Sequences[i].getStartLoc(), NoteID);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register CloneChecker
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void ento::registerCloneChecker(CheckerManager &Mgr) {
|
||||
Mgr.registerChecker<CloneChecker>();
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
// RUN: %clang_cc1 -analyze -fblocks -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
|
||||
|
||||
// This tests if we search for clones in blocks.
|
||||
|
||||
void log();
|
||||
|
||||
auto BlockA = ^(int a, int b){ // expected-warning{{Detected code clone.}}
|
||||
log();
|
||||
if (a > b)
|
||||
return a;
|
||||
return b;
|
||||
};
|
||||
|
||||
auto BlockB = ^(int a, int b){ // expected-note{{Related code clone is here.}}
|
||||
log();
|
||||
if (a > b)
|
||||
return a;
|
||||
return b;
|
||||
};
|
|
@ -0,0 +1,29 @@
|
|||
// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
|
||||
|
||||
// This test contains false-positive reports from the CloneChecker that need to
|
||||
// be fixed.
|
||||
|
||||
void log();
|
||||
|
||||
int max(int a, int b) { // expected-warning{{Detected code clone.}}
|
||||
log();
|
||||
if (a > b)
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
// FIXME: Detect different binary operator kinds.
|
||||
int min1(int a, int b) { // expected-note{{Related code clone is here.}}
|
||||
log();
|
||||
if (a < b)
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
// FIXME: Detect different variable patterns.
|
||||
int min2(int a, int b) { // expected-note{{Related code clone is here.}}
|
||||
log();
|
||||
if (b > a)
|
||||
return a;
|
||||
return b;
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
|
||||
|
||||
// This tests if we search for clones in functions.
|
||||
|
||||
void log();
|
||||
|
||||
int max(int a, int b) { // expected-warning{{Detected code clone.}}
|
||||
log();
|
||||
if (a > b)
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
int maxClone(int x, int y) { // expected-note{{Related code clone is here.}}
|
||||
log();
|
||||
if (x > y)
|
||||
return x;
|
||||
return y;
|
||||
}
|
||||
|
||||
// Functions below are not clones and should not be reported.
|
||||
|
||||
int foo(int a, int b) { // no-warning
|
||||
return a + b;
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// RUN: %clang_cc1 -analyze -Wno-objc-root-class -analyzer-checker=alpha.clone.CloneChecker -verify %s
|
||||
|
||||
// This tests if we search for clones in Objective-C methods.
|
||||
|
||||
@interface A
|
||||
- (int) setOk : (int) a : (int) b;
|
||||
@end
|
||||
|
||||
@implementation A
|
||||
- (int) setOk : (int) a : (int) b { // expected-warning{{Detected code clone.}}
|
||||
if (a > b)
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
@end
|
||||
|
||||
@interface B
|
||||
- (int) setOk : (int) a : (int) b;
|
||||
@end
|
||||
|
||||
@implementation B
|
||||
- (int) setOk : (int) a : (int) b { // expected-note{{Related code clone is here.}}
|
||||
if (a > b)
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
@end
|
|
@ -0,0 +1,27 @@
|
|||
// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
|
||||
|
||||
// This tests if sub-sequences can match with normal sequences.
|
||||
|
||||
void log2(int a);
|
||||
void log();
|
||||
|
||||
int max(int a, int b) {
|
||||
log2(a);
|
||||
log(); // expected-warning{{Detected code clone.}}
|
||||
if (a > b)
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
int maxClone(int a, int b) {
|
||||
log(); // expected-note{{Related code clone is here.}}
|
||||
if (a > b)
|
||||
return a;
|
||||
return b;
|
||||
}
|
||||
|
||||
// Functions below are not clones and should not be reported.
|
||||
|
||||
int foo(int a, int b) { // no-warning
|
||||
return a + b;
|
||||
}
|
Loading…
Reference in New Issue