[analyzer] Respect statement-specific data in CloneDetection.

So far the CloneDetector only respected the kind of each statement when
searching for clones. This patch refines the way the CloneDetector collects data
from each statement by providing methods for each statement kind,
that will read the kind-specific attributes.

For example, statements 'a < b' and 'a > b' are no longer considered to be
clones, because they are different in operation code, which is an attribute
specific to the BinaryOperator statement kind.

Patch by Raphael Isemann!

Differential Revision: https://reviews.llvm.org/D22514

llvm-svn: 277449
This commit is contained in:
Artem Dergachev 2016-08-02 12:21:09 +00:00
parent 3f704497fa
commit 78692ea590
16 changed files with 532 additions and 12 deletions

View File

@ -157,6 +157,8 @@ public:
/// are not supported.
class CloneDetector {
public:
typedef unsigned DataPiece;
/// Holds the data about a StmtSequence that is needed during the search for
/// code clones.
struct CloneSignature {
@ -164,7 +166,7 @@ public:
///
/// If this variable is equal for two different StmtSequences, then they can
/// be considered clones of each other.
std::vector<unsigned> Data;
std::vector<DataPiece> Data;
/// \brief The complexity of the StmtSequence.
///

View File

@ -16,6 +16,7 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
#include "llvm/ADT/StringRef.h"
using namespace clang;
@ -78,6 +79,168 @@ SourceLocation StmtSequence::getStartLoc() const {
SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
namespace {
/// \brief Collects the data of a single Stmt.
///
/// This class defines what a code clone is: If it collects for two statements
/// the same data, then those two statements are considered to be clones of each
/// other.
class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector> {
ASTContext &Context;
std::vector<CloneDetector::DataPiece> &CollectedData;
public:
/// \brief Collects data of the given Stmt.
/// \param S The given statement.
/// \param Context The ASTContext of S.
/// \param D The given data vector to which all collected data is appended.
StmtDataCollector(const Stmt *S, ASTContext &Context,
std::vector<CloneDetector::DataPiece> &D)
: Context(Context), CollectedData(D) {
Visit(S);
}
// Below are utility methods for appending different data to the vector.
void addData(CloneDetector::DataPiece Integer) {
CollectedData.push_back(Integer);
}
// FIXME: The functions below add long strings to the data vector which are
// probably not good for performance. Replace the strings with pointer values
// or a some other unique integer.
void addData(llvm::StringRef Str) {
if (Str.empty())
return;
const size_t OldSize = CollectedData.size();
const size_t PieceSize = sizeof(CloneDetector::DataPiece);
// Calculate how many vector units we need to accomodate all string bytes.
size_t RoundedUpPieceNumber = (Str.size() + PieceSize - 1) / PieceSize;
// Allocate space for the string in the data vector.
CollectedData.resize(CollectedData.size() + RoundedUpPieceNumber);
// Copy the string to the allocated space at the end of the vector.
std::memcpy(CollectedData.data() + OldSize, Str.data(), Str.size());
}
void addData(const QualType &QT) { addData(QT.getAsString()); }
// The functions below collect the class specific data of each Stmt subclass.
// Utility macro for defining a visit method for a given class. This method
// calls back to the ConstStmtVisitor to visit all parent classes.
#define DEF_ADD_DATA(CLASS, CODE) \
void Visit##CLASS(const CLASS *S) { \
CODE; \
ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S); \
}
DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); })
DEF_ADD_DATA(Expr, { addData(S->getType()); })
//--- Builtin functionality ----------------------------------------------//
DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
DEF_ADD_DATA(TypeTraitExpr, {
addData(S->getTrait());
for (unsigned i = 0; i < S->getNumArgs(); ++i)
addData(S->getArg(i)->getType());
})
//--- Calls --------------------------------------------------------------//
DEF_ADD_DATA(CallExpr,
{ addData(S->getDirectCallee()->getQualifiedNameAsString()); })
//--- Exceptions ---------------------------------------------------------//
DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); })
//--- C++ OOP Stmts ------------------------------------------------------//
DEF_ADD_DATA(CXXDeleteExpr, {
addData(S->isArrayFormAsWritten());
addData(S->isGlobalDelete());
})
//--- Casts --------------------------------------------------------------//
DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); })
//--- Miscellaneous Exprs ------------------------------------------------//
DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); })
DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); })
//--- Control flow -------------------------------------------------------//
DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); })
DEF_ADD_DATA(IndirectGotoStmt, {
if (S->getConstantTarget())
addData(S->getConstantTarget()->getName());
})
DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); })
DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); })
DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); })
//--- Objective-C --------------------------------------------------------//
DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); })
DEF_ADD_DATA(ObjCPropertyRefExpr, {
addData(S->isSuperReceiver());
addData(S->isImplicitProperty());
})
DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); })
//--- Miscellaneous Stmts ------------------------------------------------//
DEF_ADD_DATA(CXXFoldExpr, {
addData(S->isRightFold());
addData(S->getOperator());
})
DEF_ADD_DATA(GenericSelectionExpr, {
for (unsigned i = 0; i < S->getNumAssocs(); ++i) {
addData(S->getAssocType(i));
}
})
DEF_ADD_DATA(LambdaExpr, {
for (const LambdaCapture &C : S->captures()) {
addData(C.isPackExpansion());
addData(C.getCaptureKind());
if (C.capturesVariable())
addData(C.getCapturedVar()->getType());
}
addData(S->isGenericLambda());
addData(S->isMutable());
})
DEF_ADD_DATA(DeclStmt, {
auto numDecls = std::distance(S->decl_begin(), S->decl_end());
addData(static_cast<CloneDetector::DataPiece>(numDecls));
for (const Decl *D : S->decls()) {
if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
addData(VD->getType());
}
}
})
DEF_ADD_DATA(AsmStmt, {
addData(S->isSimple());
addData(S->isVolatile());
addData(S->generateAsmString(Context));
for (unsigned i = 0; i < S->getNumInputs(); ++i) {
addData(S->getInputConstraint(i));
}
for (unsigned i = 0; i < S->getNumOutputs(); ++i) {
addData(S->getOutputConstraint(i));
}
for (unsigned i = 0; i < S->getNumClobbers(); ++i) {
addData(S->getClobber(i));
}
})
DEF_ADD_DATA(AttributedStmt, {
for (const Attr *A : S->getAttrs()) {
addData(std::string(A->getSpelling()));
}
})
};
} // end anonymous namespace
namespace {
/// Generates CloneSignatures for a set of statements and stores the results in
/// a CloneDetector object.
@ -95,9 +258,8 @@ class CloneSignatureGenerator {
// Create an empty signature that will be filled in this method.
CloneDetector::CloneSignature Signature;
// The only relevant data for now is the class of the statement.
// TODO: Collect statement class specific data.
Signature.Data.push_back(S->getStmtClass());
// Collect all relevant data from S and put it into the empty signature.
StmtDataCollector(S, Context, Signature.Data);
// Storage for the signatures of the direct child statements. This is only
// needed if the current statement is a CompoundStmt.

View File

@ -0,0 +1,44 @@
// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
int foo1(int src) {
int dst = src;
if (src < 100 && src > 0) {
asm ("mov %1, %0\n\t"
"add $1, %0"
: "=r" (dst)
: "r" (src));
}
return dst;
}
// Identical to foo1 except that it adds two instead of one, so it's no clone.
int foo2(int src) {
int dst = src;
if (src < 100 && src > 0) {
asm ("mov %1, %0\n\t"
"add $2, %0"
: "=r" (dst)
: "r" (src));
}
return dst;
}
// Identical to foo1 except that its a volatile asm statement, so it's no clone.
int foo3(int src) {
int dst = src;
if (src < 100 && src > 0) {
asm volatile ("mov %1, %0\n\t"
"add $1, %0"
: "=r" (dst)
: "r" (src));
}
return dst;
}

View File

@ -0,0 +1,28 @@
// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
int foo1(int n) {
int result = 0;
switch (n) {
case 33:
result += 33;
[[clang::fallthrough]];
case 44:
result += 44;
}
return result;
}
// Identical to foo1 except the missing attribute.
int foo2(int n) {
int result = 0;
switch (n) {
case 33:
result += 33;
;
case 44:
result += 44;
}
return result;
}

View File

@ -0,0 +1,24 @@
// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
bool a();
bool b();
// Calls method a with some extra code to pass the minimum complexity
bool foo1(int x) {
if (x > 0)
return false;
else if (x < 0)
return a();
return true;
}
// Calls method b with some extra code to pass the minimum complexity
bool foo2(int x) {
if (x > 0)
return false;
else if (x < 0)
return b();
return true;
}

View File

@ -0,0 +1,29 @@
// RUN: %clang_cc1 -analyze -fcxx-exceptions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
bool foo1(int x) {
if (x > 0)
return false;
else if (x < 0)
try { x--; } catch (int i) {}
return true;
}
// Uses parenthesis instead of type
bool foo2(int x) {
if (x > 0)
return false;
else if (x < 0)
try { x--; } catch (...) {}
return true;
}
// Catches a different type (long instead of int)
bool foo3(int x) {
if (x > 0)
return false;
else if (x < 0)
try { x--; } catch (long i) {}
return true;
}

View File

@ -0,0 +1,29 @@
// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
bool foo1(int x, int* a) {
if (x > 0)
return false;
else if (x < 0)
delete a;
return true;
}
// Explicit global delete
bool foo2(int x, int* a) {
if (x > 0)
return false;
else if (x < 0)
::delete a;
return true;
}
// Array delete
bool foo3(int x, int* a) {
if (x > 0)
return false;
else if (x < 0)
delete[] a;
return true;
}

View File

@ -0,0 +1,18 @@
// RUN: %clang_cc1 -analyze -fms-extensions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
bool foo1(int x) {
if (x < 0) {
__if_exists(x) { return false; }
}
return true;
}
// Same as above, but __if_not_exists
bool foo2(int x) {
if (x < 0) {
__if_not_exists(x) { return false; }
}
return true;
}

View File

@ -0,0 +1,17 @@
// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
int foo1(int a, int b) {
if (a > b)
return a;
return b;
}
// Different types, so not a clone
int foo2(long a, long b) {
if (a > b)
return a;
return b;
}

View File

@ -12,14 +12,6 @@ int max(int a, int b) { // expected-warning{{Detected code clone.}}
return b;
}
// FIXME: Detect different binary operator kinds.
int min1(int a, int b) { // expected-note{{Related code clone is here.}}
log();
if (a < b)
return a;
return b;
}
// FIXME: Detect different variable patterns.
int min2(int a, int b) { // expected-note{{Related code clone is here.}}
log();

View File

@ -0,0 +1,35 @@
// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
int global = 0;
template<typename ...Args>
int foo1(Args&&... args) {
if (global > 0)
return 0;
else if (global < 0)
return (args + ...);
return 1;
}
// Different opeator in fold expression.
template<typename ...Args>
int foo2(Args&&... args) {
if (global > 0)
return 0;
else if (global < 0)
return (args - ...);
return 1;
}
// Parameter pack on a different side
template<typename ...Args>
int foo3(Args&&... args) {
if (global > 0)
return 0;
else if (global < 0)
return -1;
return (... + args);
return 1;
}

View File

@ -0,0 +1,9 @@
// RUN: %clang_cc1 -analyze -fcxx-exceptions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
// Tests if function try blocks are correctly handled.
void nonCompoundStmt1(int& x)
try { x += 1; } catch(...) { x -= 1; } // expected-warning{{Detected code clone.}}
void nonCompoundStmt2(int& x)
try { x += 1; } catch(...) { x -= 1; } // expected-note{{Related code clone is here.}}

View File

@ -20,6 +20,31 @@ int maxClone(int x, int y) { // expected-note{{Related code clone is here.}}
// Functions below are not clones and should not be reported.
// The next two functions test that statement classes are still respected when
// checking for clones in expressions. This will show that the statement
// specific data of all base classes is collected, and not just the data of the
// first base class.
int testBaseClass(int a, int b) { // no-warning
log();
if (a > b)
return true ? a : b;
return b;
}
int testBaseClass2(int a, int b) { // no-warning
log();
if (a > b)
return __builtin_choose_expr(true, a, b);
return b;
}
int min1(int a, int b) { // no-warning
log();
if (a < b)
return a;
return b;
}
int foo(int a, int b) { // no-warning
return a + b;
}

View File

@ -0,0 +1,31 @@
// RUN: %clang_cc1 -analyze -std=c11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
int global;
int foo1() {
if (global > 0)
return 0;
else if (global < 0)
return _Generic(global, double: 1, float: 2, default: 3);
return 1;
}
// Different associated type (int instead of float)
int foo2() {
if (global > 0)
return 0;
else if (global < 0)
return _Generic(global, double: 1, int: 2, default: 4);
return 1;
}
// Different number of associated types.
int foo3() {
if (global > 0)
return 0;
else if (global < 0)
return _Generic(global, double: 1, default: 4);
return 1;
}

View File

@ -0,0 +1,51 @@
// RUN: %clang_cc1 -analyze -std=gnu++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
bool foo1(int x) {
start:
if (x != 3) {
++x;
void *ptr = &&start;
goto start;
}
end:
return false;
}
// Targeting a different label with the address-of-label operator.
bool foo2(int x) {
start:
if (x != 3) {
++x;
void *ptr = &&end;
goto start;
}
end:
return false;
}
// Different target label in goto
bool foo3(int x) {
start:
if (x != 3) {
++x;
void *ptr = &&start;
goto end;
}
end:
return false;
}
// FIXME: Can't detect same algorithm as in foo1 but with different label names.
bool foo4(int x) {
foo:
if (x != 3) {
++x;
void *ptr = &&foo;
goto foo;
}
end:
return false;
}

View File

@ -0,0 +1,24 @@
// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
// expected-no-diagnostics
void foo1(int a, long b) {
auto l = [a, b](){};
}
void foo2(int a, long b) {
auto l = [&a, b](){};
}
void foo3(int a, long b) {
auto l = [a](){};
}
void foo4(int a, long b) {
auto l = [=](){};
}
void foo5(int a, long b) {
auto l = [&](){};
}