From 78692ea590046c7c9a66e58a14165ff6b16b5ae5 Mon Sep 17 00:00:00 2001 From: Artem Dergachev Date: Tue, 2 Aug 2016 12:21:09 +0000 Subject: [PATCH] [analyzer] Respect statement-specific data in CloneDetection. So far the CloneDetector only respected the kind of each statement when searching for clones. This patch refines the way the CloneDetector collects data from each statement by providing methods for each statement kind, that will read the kind-specific attributes. For example, statements 'a < b' and 'a > b' are no longer considered to be clones, because they are different in operation code, which is an attribute specific to the BinaryOperator statement kind. Patch by Raphael Isemann! Differential Revision: https://reviews.llvm.org/D22514 llvm-svn: 277449 --- clang/include/clang/Analysis/CloneDetection.h | 4 +- clang/lib/Analysis/CloneDetection.cpp | 168 +++++++++++++++++- clang/test/Analysis/copypaste/asm.cpp | 44 +++++ clang/test/Analysis/copypaste/attributes.cpp | 28 +++ clang/test/Analysis/copypaste/call.cpp | 24 +++ clang/test/Analysis/copypaste/catch.cpp | 29 +++ clang/test/Analysis/copypaste/delete.cpp | 29 +++ .../Analysis/copypaste/dependent-exist.cpp | 18 ++ clang/test/Analysis/copypaste/expr-types.cpp | 17 ++ .../Analysis/copypaste/false-positives.cpp | 8 - clang/test/Analysis/copypaste/fold.cpp | 35 ++++ .../Analysis/copypaste/function-try-block.cpp | 9 + clang/test/Analysis/copypaste/functions.cpp | 25 +++ clang/test/Analysis/copypaste/generic.c | 31 ++++ clang/test/Analysis/copypaste/labels.cpp | 51 ++++++ clang/test/Analysis/copypaste/lambda.cpp | 24 +++ 16 files changed, 532 insertions(+), 12 deletions(-) create mode 100644 clang/test/Analysis/copypaste/asm.cpp create mode 100644 clang/test/Analysis/copypaste/attributes.cpp create mode 100644 clang/test/Analysis/copypaste/call.cpp create mode 100644 clang/test/Analysis/copypaste/catch.cpp create mode 100644 clang/test/Analysis/copypaste/delete.cpp create mode 100644 clang/test/Analysis/copypaste/dependent-exist.cpp create mode 100644 clang/test/Analysis/copypaste/expr-types.cpp create mode 100644 clang/test/Analysis/copypaste/fold.cpp create mode 100644 clang/test/Analysis/copypaste/function-try-block.cpp create mode 100644 clang/test/Analysis/copypaste/generic.c create mode 100644 clang/test/Analysis/copypaste/labels.cpp create mode 100644 clang/test/Analysis/copypaste/lambda.cpp diff --git a/clang/include/clang/Analysis/CloneDetection.h b/clang/include/clang/Analysis/CloneDetection.h index 61b122e5a810..9bb4022ca406 100644 --- a/clang/include/clang/Analysis/CloneDetection.h +++ b/clang/include/clang/Analysis/CloneDetection.h @@ -157,6 +157,8 @@ public: /// are not supported. class CloneDetector { public: + typedef unsigned DataPiece; + /// Holds the data about a StmtSequence that is needed during the search for /// code clones. struct CloneSignature { @@ -164,7 +166,7 @@ public: /// /// If this variable is equal for two different StmtSequences, then they can /// be considered clones of each other. - std::vector Data; + std::vector Data; /// \brief The complexity of the StmtSequence. /// diff --git a/clang/lib/Analysis/CloneDetection.cpp b/clang/lib/Analysis/CloneDetection.cpp index 0d3926ebd36d..1babbb3c15e6 100644 --- a/clang/lib/Analysis/CloneDetection.cpp +++ b/clang/lib/Analysis/CloneDetection.cpp @@ -16,6 +16,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtVisitor.h" #include "llvm/ADT/StringRef.h" using namespace clang; @@ -78,6 +79,168 @@ SourceLocation StmtSequence::getStartLoc() const { SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } +namespace { +/// \brief Collects the data of a single Stmt. +/// +/// This class defines what a code clone is: If it collects for two statements +/// the same data, then those two statements are considered to be clones of each +/// other. +class StmtDataCollector : public ConstStmtVisitor { + + ASTContext &Context; + std::vector &CollectedData; + +public: + /// \brief Collects data of the given Stmt. + /// \param S The given statement. + /// \param Context The ASTContext of S. + /// \param D The given data vector to which all collected data is appended. + StmtDataCollector(const Stmt *S, ASTContext &Context, + std::vector &D) + : Context(Context), CollectedData(D) { + Visit(S); + } + + // Below are utility methods for appending different data to the vector. + + void addData(CloneDetector::DataPiece Integer) { + CollectedData.push_back(Integer); + } + + // FIXME: The functions below add long strings to the data vector which are + // probably not good for performance. Replace the strings with pointer values + // or a some other unique integer. + + void addData(llvm::StringRef Str) { + if (Str.empty()) + return; + + const size_t OldSize = CollectedData.size(); + + const size_t PieceSize = sizeof(CloneDetector::DataPiece); + // Calculate how many vector units we need to accomodate all string bytes. + size_t RoundedUpPieceNumber = (Str.size() + PieceSize - 1) / PieceSize; + // Allocate space for the string in the data vector. + CollectedData.resize(CollectedData.size() + RoundedUpPieceNumber); + + // Copy the string to the allocated space at the end of the vector. + std::memcpy(CollectedData.data() + OldSize, Str.data(), Str.size()); + } + + void addData(const QualType &QT) { addData(QT.getAsString()); } + +// The functions below collect the class specific data of each Stmt subclass. + +// Utility macro for defining a visit method for a given class. This method +// calls back to the ConstStmtVisitor to visit all parent classes. +#define DEF_ADD_DATA(CLASS, CODE) \ + void Visit##CLASS(const CLASS *S) { \ + CODE; \ + ConstStmtVisitor::Visit##CLASS(S); \ + } + + DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); }) + DEF_ADD_DATA(Expr, { addData(S->getType()); }) + + //--- Builtin functionality ----------------------------------------------// + DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); }) + DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); }) + DEF_ADD_DATA(TypeTraitExpr, { + addData(S->getTrait()); + for (unsigned i = 0; i < S->getNumArgs(); ++i) + addData(S->getArg(i)->getType()); + }) + + //--- Calls --------------------------------------------------------------// + DEF_ADD_DATA(CallExpr, + { addData(S->getDirectCallee()->getQualifiedNameAsString()); }) + + //--- Exceptions ---------------------------------------------------------// + DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); }) + + //--- C++ OOP Stmts ------------------------------------------------------// + DEF_ADD_DATA(CXXDeleteExpr, { + addData(S->isArrayFormAsWritten()); + addData(S->isGlobalDelete()); + }) + + //--- Casts --------------------------------------------------------------// + DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); }) + + //--- Miscellaneous Exprs ------------------------------------------------// + DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); }) + DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); }) + + //--- Control flow -------------------------------------------------------// + DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); }) + DEF_ADD_DATA(IndirectGotoStmt, { + if (S->getConstantTarget()) + addData(S->getConstantTarget()->getName()); + }) + DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); }) + DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); }) + DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); }) + + //--- Objective-C --------------------------------------------------------// + DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); }) + DEF_ADD_DATA(ObjCPropertyRefExpr, { + addData(S->isSuperReceiver()); + addData(S->isImplicitProperty()); + }) + DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); }) + + //--- Miscellaneous Stmts ------------------------------------------------// + DEF_ADD_DATA(CXXFoldExpr, { + addData(S->isRightFold()); + addData(S->getOperator()); + }) + DEF_ADD_DATA(GenericSelectionExpr, { + for (unsigned i = 0; i < S->getNumAssocs(); ++i) { + addData(S->getAssocType(i)); + } + }) + DEF_ADD_DATA(LambdaExpr, { + for (const LambdaCapture &C : S->captures()) { + addData(C.isPackExpansion()); + addData(C.getCaptureKind()); + if (C.capturesVariable()) + addData(C.getCapturedVar()->getType()); + } + addData(S->isGenericLambda()); + addData(S->isMutable()); + }) + DEF_ADD_DATA(DeclStmt, { + auto numDecls = std::distance(S->decl_begin(), S->decl_end()); + addData(static_cast(numDecls)); + for (const Decl *D : S->decls()) { + if (const VarDecl *VD = dyn_cast(D)) { + addData(VD->getType()); + } + } + }) + DEF_ADD_DATA(AsmStmt, { + addData(S->isSimple()); + addData(S->isVolatile()); + addData(S->generateAsmString(Context)); + for (unsigned i = 0; i < S->getNumInputs(); ++i) { + addData(S->getInputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumOutputs(); ++i) { + addData(S->getOutputConstraint(i)); + } + for (unsigned i = 0; i < S->getNumClobbers(); ++i) { + addData(S->getClobber(i)); + } + }) + DEF_ADD_DATA(AttributedStmt, { + for (const Attr *A : S->getAttrs()) { + addData(std::string(A->getSpelling())); + } + }) +}; +} // end anonymous namespace + namespace { /// Generates CloneSignatures for a set of statements and stores the results in /// a CloneDetector object. @@ -95,9 +258,8 @@ class CloneSignatureGenerator { // Create an empty signature that will be filled in this method. CloneDetector::CloneSignature Signature; - // The only relevant data for now is the class of the statement. - // TODO: Collect statement class specific data. - Signature.Data.push_back(S->getStmtClass()); + // Collect all relevant data from S and put it into the empty signature. + StmtDataCollector(S, Context, Signature.Data); // Storage for the signatures of the direct child statements. This is only // needed if the current statement is a CompoundStmt. diff --git a/clang/test/Analysis/copypaste/asm.cpp b/clang/test/Analysis/copypaste/asm.cpp new file mode 100644 index 000000000000..61f0def594d8 --- /dev/null +++ b/clang/test/Analysis/copypaste/asm.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int foo1(int src) { + int dst = src; + if (src < 100 && src > 0) { + + asm ("mov %1, %0\n\t" + "add $1, %0" + : "=r" (dst) + : "r" (src)); + + } + return dst; +} + +// Identical to foo1 except that it adds two instead of one, so it's no clone. +int foo2(int src) { + int dst = src; + if (src < 100 && src > 0) { + + asm ("mov %1, %0\n\t" + "add $2, %0" + : "=r" (dst) + : "r" (src)); + + } + return dst; +} + +// Identical to foo1 except that its a volatile asm statement, so it's no clone. +int foo3(int src) { + int dst = src; + if (src < 100 && src > 0) { + + asm volatile ("mov %1, %0\n\t" + "add $1, %0" + : "=r" (dst) + : "r" (src)); + + } + return dst; +} diff --git a/clang/test/Analysis/copypaste/attributes.cpp b/clang/test/Analysis/copypaste/attributes.cpp new file mode 100644 index 000000000000..72d654c6e060 --- /dev/null +++ b/clang/test/Analysis/copypaste/attributes.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int foo1(int n) { + int result = 0; + switch (n) { + case 33: + result += 33; + [[clang::fallthrough]]; + case 44: + result += 44; + } + return result; +} + +// Identical to foo1 except the missing attribute. +int foo2(int n) { + int result = 0; + switch (n) { + case 33: + result += 33; + ; + case 44: + result += 44; + } + return result; +} diff --git a/clang/test/Analysis/copypaste/call.cpp b/clang/test/Analysis/copypaste/call.cpp new file mode 100644 index 000000000000..06aa633fe963 --- /dev/null +++ b/clang/test/Analysis/copypaste/call.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool a(); +bool b(); + +// Calls method a with some extra code to pass the minimum complexity +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) + return a(); + return true; +} + +// Calls method b with some extra code to pass the minimum complexity +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) + return b(); + return true; +} diff --git a/clang/test/Analysis/copypaste/catch.cpp b/clang/test/Analysis/copypaste/catch.cpp new file mode 100644 index 000000000000..590ce8f223f9 --- /dev/null +++ b/clang/test/Analysis/copypaste/catch.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -analyze -fcxx-exceptions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x) { + if (x > 0) + return false; + else if (x < 0) + try { x--; } catch (int i) {} + return true; +} + +// Uses parenthesis instead of type +bool foo2(int x) { + if (x > 0) + return false; + else if (x < 0) + try { x--; } catch (...) {} + return true; +} + +// Catches a different type (long instead of int) +bool foo3(int x) { + if (x > 0) + return false; + else if (x < 0) + try { x--; } catch (long i) {} + return true; +} diff --git a/clang/test/Analysis/copypaste/delete.cpp b/clang/test/Analysis/copypaste/delete.cpp new file mode 100644 index 000000000000..dc42c9c0595b --- /dev/null +++ b/clang/test/Analysis/copypaste/delete.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x, int* a) { + if (x > 0) + return false; + else if (x < 0) + delete a; + return true; +} + +// Explicit global delete +bool foo2(int x, int* a) { + if (x > 0) + return false; + else if (x < 0) + ::delete a; + return true; +} + +// Array delete +bool foo3(int x, int* a) { + if (x > 0) + return false; + else if (x < 0) + delete[] a; + return true; +} diff --git a/clang/test/Analysis/copypaste/dependent-exist.cpp b/clang/test/Analysis/copypaste/dependent-exist.cpp new file mode 100644 index 000000000000..5182ba61c9dc --- /dev/null +++ b/clang/test/Analysis/copypaste/dependent-exist.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -analyze -fms-extensions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +bool foo1(int x) { + if (x < 0) { + __if_exists(x) { return false; } + } + return true; +} + +// Same as above, but __if_not_exists +bool foo2(int x) { + if (x < 0) { + __if_not_exists(x) { return false; } + } + return true; +} diff --git a/clang/test/Analysis/copypaste/expr-types.cpp b/clang/test/Analysis/copypaste/expr-types.cpp new file mode 100644 index 000000000000..14eef6eac636 --- /dev/null +++ b/clang/test/Analysis/copypaste/expr-types.cpp @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + + +int foo1(int a, int b) { + if (a > b) + return a; + return b; +} + +// Different types, so not a clone +int foo2(long a, long b) { + if (a > b) + return a; + return b; +} diff --git a/clang/test/Analysis/copypaste/false-positives.cpp b/clang/test/Analysis/copypaste/false-positives.cpp index 8d649c790b85..4c6275949dab 100644 --- a/clang/test/Analysis/copypaste/false-positives.cpp +++ b/clang/test/Analysis/copypaste/false-positives.cpp @@ -12,14 +12,6 @@ int max(int a, int b) { // expected-warning{{Detected code clone.}} return b; } -// FIXME: Detect different binary operator kinds. -int min1(int a, int b) { // expected-note{{Related code clone is here.}} - log(); - if (a < b) - return a; - return b; -} - // FIXME: Detect different variable patterns. int min2(int a, int b) { // expected-note{{Related code clone is here.}} log(); diff --git a/clang/test/Analysis/copypaste/fold.cpp b/clang/test/Analysis/copypaste/fold.cpp new file mode 100644 index 000000000000..548dfb19af56 --- /dev/null +++ b/clang/test/Analysis/copypaste/fold.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int global = 0; + +template +int foo1(Args&&... args) { + if (global > 0) + return 0; + else if (global < 0) + return (args + ...); + return 1; +} + +// Different opeator in fold expression. +template +int foo2(Args&&... args) { + if (global > 0) + return 0; + else if (global < 0) + return (args - ...); + return 1; +} + +// Parameter pack on a different side +template +int foo3(Args&&... args) { + if (global > 0) + return 0; + else if (global < 0) + return -1; + return (... + args); +return 1; +} diff --git a/clang/test/Analysis/copypaste/function-try-block.cpp b/clang/test/Analysis/copypaste/function-try-block.cpp new file mode 100644 index 000000000000..b13096d949a9 --- /dev/null +++ b/clang/test/Analysis/copypaste/function-try-block.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -analyze -fcxx-exceptions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// Tests if function try blocks are correctly handled. + +void nonCompoundStmt1(int& x) + try { x += 1; } catch(...) { x -= 1; } // expected-warning{{Detected code clone.}} + +void nonCompoundStmt2(int& x) + try { x += 1; } catch(...) { x -= 1; } // expected-note{{Related code clone is here.}} diff --git a/clang/test/Analysis/copypaste/functions.cpp b/clang/test/Analysis/copypaste/functions.cpp index 29f389a5eac2..bedd374b79d6 100644 --- a/clang/test/Analysis/copypaste/functions.cpp +++ b/clang/test/Analysis/copypaste/functions.cpp @@ -20,6 +20,31 @@ int maxClone(int x, int y) { // expected-note{{Related code clone is here.}} // Functions below are not clones and should not be reported. +// The next two functions test that statement classes are still respected when +// checking for clones in expressions. This will show that the statement +// specific data of all base classes is collected, and not just the data of the +// first base class. +int testBaseClass(int a, int b) { // no-warning + log(); + if (a > b) + return true ? a : b; + return b; +} +int testBaseClass2(int a, int b) { // no-warning + log(); + if (a > b) + return __builtin_choose_expr(true, a, b); + return b; +} + + +int min1(int a, int b) { // no-warning + log(); + if (a < b) + return a; + return b; +} + int foo(int a, int b) { // no-warning return a + b; } diff --git a/clang/test/Analysis/copypaste/generic.c b/clang/test/Analysis/copypaste/generic.c new file mode 100644 index 000000000000..9d8392139b39 --- /dev/null +++ b/clang/test/Analysis/copypaste/generic.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -analyze -std=c11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +int global; + +int foo1() { + if (global > 0) + return 0; + else if (global < 0) + return _Generic(global, double: 1, float: 2, default: 3); + return 1; +} + +// Different associated type (int instead of float) +int foo2() { + if (global > 0) + return 0; + else if (global < 0) + return _Generic(global, double: 1, int: 2, default: 4); + return 1; +} + +// Different number of associated types. +int foo3() { + if (global > 0) + return 0; + else if (global < 0) + return _Generic(global, double: 1, default: 4); + return 1; +} diff --git a/clang/test/Analysis/copypaste/labels.cpp b/clang/test/Analysis/copypaste/labels.cpp new file mode 100644 index 000000000000..26318ac40510 --- /dev/null +++ b/clang/test/Analysis/copypaste/labels.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -analyze -std=gnu++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + + +bool foo1(int x) { + start: + if (x != 3) { + ++x; + void *ptr = &&start; + goto start; + } + end: + return false; +} + +// Targeting a different label with the address-of-label operator. +bool foo2(int x) { + start: + if (x != 3) { + ++x; + void *ptr = &&end; + goto start; + } + end: + return false; +} + +// Different target label in goto +bool foo3(int x) { + start: + if (x != 3) { + ++x; + void *ptr = &&start; + goto end; + } + end: + return false; +} + +// FIXME: Can't detect same algorithm as in foo1 but with different label names. +bool foo4(int x) { + foo: + if (x != 3) { + ++x; + void *ptr = &&foo; + goto foo; + } + end: + return false; +} diff --git a/clang/test/Analysis/copypaste/lambda.cpp b/clang/test/Analysis/copypaste/lambda.cpp new file mode 100644 index 000000000000..c13c56f6671d --- /dev/null +++ b/clang/test/Analysis/copypaste/lambda.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// expected-no-diagnostics + +void foo1(int a, long b) { + auto l = [a, b](){}; +} + +void foo2(int a, long b) { + auto l = [&a, b](){}; +} + +void foo3(int a, long b) { + auto l = [a](){}; +} + +void foo4(int a, long b) { + auto l = [=](){}; +} + +void foo5(int a, long b) { + auto l = [&](){}; +} +