2012-02-01 03:33:39 +08:00
|
|
|
//== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-02-01 03:33:39 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// An AST checker that looks for common pitfalls when using C string APIs.
|
|
|
|
// - Identifies erroneous patterns in the last argument to strncat - the number
|
|
|
|
// of bytes to copy.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
[analyzer][NFC] Move CheckerRegistry from the Core directory to Frontend
ClangCheckerRegistry is a very non-obvious, poorly documented, weird concept.
It derives from CheckerRegistry, and is placed in lib/StaticAnalyzer/Frontend,
whereas it's base is located in lib/StaticAnalyzer/Core. It was, from what I can
imagine, used to circumvent the problem that the registry functions of the
checkers are located in the clangStaticAnalyzerCheckers library, but that
library depends on clangStaticAnalyzerCore. However, clangStaticAnalyzerFrontend
depends on both of those libraries.
One can make the observation however, that CheckerRegistry has no place in Core,
it isn't used there at all! The only place where it is used is Frontend, which
is where it ultimately belongs.
This move implies that since
include/clang/StaticAnalyzer/Checkers/ClangCheckers.h only contained a single function:
class CheckerRegistry;
void registerBuiltinCheckers(CheckerRegistry ®istry);
it had to re purposed, as CheckerRegistry is no longer available to
clangStaticAnalyzerCheckers. It was renamed to BuiltinCheckerRegistration.h,
which actually describes it a lot better -- it does not contain the registration
functions for checkers, but only those generated by the tblgen files.
Differential Revision: https://reviews.llvm.org/D54436
llvm-svn: 349275
2018-12-16 00:23:51 +08:00
|
|
|
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
|
2012-02-01 03:33:39 +08:00
|
|
|
#include "clang/AST/Expr.h"
|
|
|
|
#include "clang/AST/OperationKinds.h"
|
|
|
|
#include "clang/AST/StmtVisitor.h"
|
2017-09-07 05:45:03 +08:00
|
|
|
#include "clang/Analysis/AnalysisDeclContext.h"
|
2012-02-01 03:33:39 +08:00
|
|
|
#include "clang/Basic/TargetInfo.h"
|
|
|
|
#include "clang/Basic/TypeTraits.h"
|
|
|
|
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
|
2012-12-04 17:13:33 +08:00
|
|
|
#include "clang/StaticAnalyzer/Core/Checker.h"
|
2012-02-01 03:33:39 +08:00
|
|
|
#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
|
|
|
|
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
|
2012-02-04 21:45:25 +08:00
|
|
|
#include "llvm/ADT/SmallString.h"
|
2012-02-01 03:33:39 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
|
|
|
|
using namespace clang;
|
|
|
|
using namespace ento;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
class WalkAST: public StmtVisitor<WalkAST> {
|
2014-02-12 05:49:21 +08:00
|
|
|
const CheckerBase *Checker;
|
2012-02-01 03:33:39 +08:00
|
|
|
BugReporter &BR;
|
|
|
|
AnalysisDeclContext* AC;
|
|
|
|
|
|
|
|
/// Check if two expressions refer to the same declaration.
|
2017-02-02 16:20:54 +08:00
|
|
|
bool sameDecl(const Expr *A1, const Expr *A2) {
|
|
|
|
if (const auto *D1 = dyn_cast<DeclRefExpr>(A1->IgnoreParenCasts()))
|
|
|
|
if (const auto *D2 = dyn_cast<DeclRefExpr>(A2->IgnoreParenCasts()))
|
2012-02-01 03:33:39 +08:00
|
|
|
return D1->getDecl() == D2->getDecl();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if the expression E is a sizeof(WithArg).
|
2017-02-02 16:20:54 +08:00
|
|
|
bool isSizeof(const Expr *E, const Expr *WithArg) {
|
|
|
|
if (const auto *UE = dyn_cast<UnaryExprOrTypeTraitExpr>(E))
|
|
|
|
if (UE->getKind() == UETT_SizeOf && !UE->isArgumentType())
|
2012-02-01 03:33:39 +08:00
|
|
|
return sameDecl(UE->getArgumentExpr(), WithArg);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if the expression E is a strlen(WithArg).
|
2017-02-02 16:20:54 +08:00
|
|
|
bool isStrlen(const Expr *E, const Expr *WithArg) {
|
|
|
|
if (const auto *CE = dyn_cast<CallExpr>(E)) {
|
2012-02-01 03:33:39 +08:00
|
|
|
const FunctionDecl *FD = CE->getDirectCallee();
|
|
|
|
if (!FD)
|
|
|
|
return false;
|
2012-11-03 07:49:24 +08:00
|
|
|
return (CheckerContext::isCLibraryFunction(FD, "strlen") &&
|
|
|
|
sameDecl(CE->getArg(0), WithArg));
|
2012-02-01 03:33:39 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if the expression is an integer literal with value 1.
|
2017-02-02 16:20:54 +08:00
|
|
|
bool isOne(const Expr *E) {
|
|
|
|
if (const auto *IL = dyn_cast<IntegerLiteral>(E))
|
2012-02-01 03:33:39 +08:00
|
|
|
return (IL->getValue().isIntN(1));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-02-02 16:20:54 +08:00
|
|
|
StringRef getPrintableName(const Expr *E) {
|
|
|
|
if (const auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))
|
2012-02-01 03:33:39 +08:00
|
|
|
return D->getDecl()->getName();
|
|
|
|
return StringRef();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Identify erroneous patterns in the last argument to strncat - the number
|
|
|
|
/// of bytes to copy.
|
|
|
|
bool containsBadStrncatPattern(const CallExpr *CE);
|
|
|
|
|
2018-07-20 05:50:03 +08:00
|
|
|
/// Identify erroneous patterns in the last argument to strlcpy - the number
|
|
|
|
/// of bytes to copy.
|
|
|
|
/// The bad pattern checked is when the size is known
|
|
|
|
/// to be larger than the destination can handle.
|
|
|
|
/// char dst[2];
|
|
|
|
/// size_t cpy = 4;
|
|
|
|
/// strlcpy(dst, "abcd", sizeof("abcd") - 1);
|
|
|
|
/// strlcpy(dst, "abcd", 4);
|
2018-07-24 02:26:38 +08:00
|
|
|
/// strlcpy(dst + 3, "abcd", 2);
|
2018-07-20 05:50:03 +08:00
|
|
|
/// strlcpy(dst, "abcd", cpy);
|
2018-09-23 16:30:17 +08:00
|
|
|
/// Identify erroneous patterns in the last argument to strlcat - the number
|
|
|
|
/// of bytes to copy.
|
|
|
|
/// The bad pattern checked is when the last argument is basically
|
|
|
|
/// pointing to the destination buffer size or argument larger or
|
|
|
|
/// equal to.
|
|
|
|
/// char dst[2];
|
|
|
|
/// strlcat(dst, src2, sizeof(dst));
|
|
|
|
/// strlcat(dst, src2, 2);
|
|
|
|
/// strlcat(dst, src2, 10);
|
|
|
|
bool containsBadStrlcpyStrlcatPattern(const CallExpr *CE);
|
2018-07-20 05:50:03 +08:00
|
|
|
|
2012-02-01 03:33:39 +08:00
|
|
|
public:
|
2017-02-02 16:20:54 +08:00
|
|
|
WalkAST(const CheckerBase *Checker, BugReporter &BR, AnalysisDeclContext *AC)
|
|
|
|
: Checker(Checker), BR(BR), AC(AC) {}
|
2012-02-01 03:33:39 +08:00
|
|
|
|
|
|
|
// Statement visitor methods.
|
|
|
|
void VisitChildren(Stmt *S);
|
|
|
|
void VisitStmt(Stmt *S) {
|
|
|
|
VisitChildren(S);
|
|
|
|
}
|
|
|
|
void VisitCallExpr(CallExpr *CE);
|
|
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
// The correct size argument should look like following:
|
|
|
|
// strncat(dst, src, sizeof(dst) - strlen(dest) - 1);
|
|
|
|
// We look for the following anti-patterns:
|
|
|
|
// - strncat(dst, src, sizeof(dst) - strlen(dst));
|
|
|
|
// - strncat(dst, src, sizeof(dst) - 1);
|
|
|
|
// - strncat(dst, src, sizeof(dst));
|
|
|
|
bool WalkAST::containsBadStrncatPattern(const CallExpr *CE) {
|
2013-04-11 06:06:29 +08:00
|
|
|
if (CE->getNumArgs() != 3)
|
|
|
|
return false;
|
2012-02-01 03:33:39 +08:00
|
|
|
const Expr *DstArg = CE->getArg(0);
|
|
|
|
const Expr *SrcArg = CE->getArg(1);
|
|
|
|
const Expr *LenArg = CE->getArg(2);
|
|
|
|
|
|
|
|
// Identify wrong size expressions, which are commonly used instead.
|
2017-02-02 16:20:54 +08:00
|
|
|
if (const auto *BE = dyn_cast<BinaryOperator>(LenArg->IgnoreParenCasts())) {
|
2012-02-01 03:33:39 +08:00
|
|
|
// - sizeof(dst) - strlen(dst)
|
|
|
|
if (BE->getOpcode() == BO_Sub) {
|
|
|
|
const Expr *L = BE->getLHS();
|
|
|
|
const Expr *R = BE->getRHS();
|
|
|
|
if (isSizeof(L, DstArg) && isStrlen(R, DstArg))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// - sizeof(dst) - 1
|
|
|
|
if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts()))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// - sizeof(dst)
|
|
|
|
if (isSizeof(LenArg, DstArg))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// - sizeof(src)
|
|
|
|
if (isSizeof(LenArg, SrcArg))
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-09-23 16:30:17 +08:00
|
|
|
bool WalkAST::containsBadStrlcpyStrlcatPattern(const CallExpr *CE) {
|
2018-07-20 05:50:03 +08:00
|
|
|
if (CE->getNumArgs() != 3)
|
|
|
|
return false;
|
|
|
|
const Expr *DstArg = CE->getArg(0);
|
|
|
|
const Expr *LenArg = CE->getArg(2);
|
|
|
|
|
2018-07-21 04:39:49 +08:00
|
|
|
const auto *DstArgDecl = dyn_cast<DeclRefExpr>(DstArg->IgnoreParenImpCasts());
|
2018-07-20 05:50:03 +08:00
|
|
|
const auto *LenArgDecl = dyn_cast<DeclRefExpr>(LenArg->IgnoreParenLValueCasts());
|
2018-07-24 02:26:38 +08:00
|
|
|
uint64_t DstOff = 0;
|
2018-09-23 16:30:17 +08:00
|
|
|
if (isSizeof(LenArg, DstArg))
|
|
|
|
return false;
|
2018-07-20 05:50:03 +08:00
|
|
|
// - size_t dstlen = sizeof(dst)
|
|
|
|
if (LenArgDecl) {
|
|
|
|
const auto *LenArgVal = dyn_cast<VarDecl>(LenArgDecl->getDecl());
|
|
|
|
if (LenArgVal->getInit())
|
2018-07-20 16:19:20 +08:00
|
|
|
LenArg = LenArgVal->getInit();
|
2018-07-20 05:50:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// - integral value
|
|
|
|
// We try to figure out if the last argument is possibly longer
|
2018-07-24 02:26:38 +08:00
|
|
|
// than the destination can possibly handle if its size can be defined.
|
2018-07-21 04:39:49 +08:00
|
|
|
if (const auto *IL = dyn_cast<IntegerLiteral>(LenArg->IgnoreParenImpCasts())) {
|
2018-07-20 05:50:03 +08:00
|
|
|
uint64_t ILRawVal = IL->getValue().getZExtValue();
|
2018-07-24 02:26:38 +08:00
|
|
|
|
|
|
|
// Case when there is pointer arithmetic on the destination buffer
|
|
|
|
// especially when we offset from the base decreasing the
|
|
|
|
// buffer length accordingly.
|
|
|
|
if (!DstArgDecl) {
|
|
|
|
if (const auto *BE = dyn_cast<BinaryOperator>(DstArg->IgnoreParenImpCasts())) {
|
|
|
|
DstArgDecl = dyn_cast<DeclRefExpr>(BE->getLHS()->IgnoreParenImpCasts());
|
|
|
|
if (BE->getOpcode() == BO_Add) {
|
|
|
|
if ((IL = dyn_cast<IntegerLiteral>(BE->getRHS()->IgnoreParenImpCasts()))) {
|
|
|
|
DstOff = IL->getValue().getZExtValue();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-07-21 04:39:49 +08:00
|
|
|
if (DstArgDecl) {
|
|
|
|
if (const auto *Buffer = dyn_cast<ConstantArrayType>(DstArgDecl->getType())) {
|
|
|
|
ASTContext &C = BR.getContext();
|
|
|
|
uint64_t BufferLen = C.getTypeSize(Buffer) / 8;
|
2018-09-23 16:30:17 +08:00
|
|
|
auto RemainingBufferLen = BufferLen - DstOff;
|
2019-02-09 07:59:52 +08:00
|
|
|
if (RemainingBufferLen < ILRawVal)
|
|
|
|
return true;
|
2018-07-21 04:39:49 +08:00
|
|
|
}
|
2018-07-20 05:50:03 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-02-01 03:33:39 +08:00
|
|
|
void WalkAST::VisitCallExpr(CallExpr *CE) {
|
|
|
|
const FunctionDecl *FD = CE->getDirectCallee();
|
|
|
|
if (!FD)
|
|
|
|
return;
|
|
|
|
|
2012-11-03 07:49:24 +08:00
|
|
|
if (CheckerContext::isCLibraryFunction(FD, "strncat")) {
|
2012-02-01 03:33:39 +08:00
|
|
|
if (containsBadStrncatPattern(CE)) {
|
|
|
|
const Expr *DstArg = CE->getArg(0);
|
|
|
|
const Expr *LenArg = CE->getArg(2);
|
|
|
|
PathDiagnosticLocation Loc =
|
|
|
|
PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC);
|
|
|
|
|
|
|
|
StringRef DstName = getPrintableName(DstArg);
|
|
|
|
|
2012-02-05 10:13:05 +08:00
|
|
|
SmallString<256> S;
|
2012-02-01 03:33:39 +08:00
|
|
|
llvm::raw_svector_ostream os(S);
|
|
|
|
os << "Potential buffer overflow. ";
|
|
|
|
if (!DstName.empty()) {
|
|
|
|
os << "Replace with 'sizeof(" << DstName << ") "
|
|
|
|
"- strlen(" << DstName <<") - 1'";
|
|
|
|
os << " or u";
|
|
|
|
} else
|
|
|
|
os << "U";
|
|
|
|
os << "se a safer 'strlcat' API";
|
|
|
|
|
2018-07-20 05:50:03 +08:00
|
|
|
BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument",
|
|
|
|
"C String API", os.str(), Loc,
|
|
|
|
LenArg->getSourceRange());
|
|
|
|
}
|
2018-09-23 16:30:17 +08:00
|
|
|
} else if (CheckerContext::isCLibraryFunction(FD, "strlcpy") ||
|
|
|
|
CheckerContext::isCLibraryFunction(FD, "strlcat")) {
|
|
|
|
if (containsBadStrlcpyStrlcatPattern(CE)) {
|
2018-07-20 05:50:03 +08:00
|
|
|
const Expr *DstArg = CE->getArg(0);
|
|
|
|
const Expr *LenArg = CE->getArg(2);
|
|
|
|
PathDiagnosticLocation Loc =
|
|
|
|
PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC);
|
|
|
|
|
|
|
|
StringRef DstName = getPrintableName(DstArg);
|
|
|
|
|
|
|
|
SmallString<256> S;
|
|
|
|
llvm::raw_svector_ostream os(S);
|
2018-09-23 16:30:17 +08:00
|
|
|
os << "The third argument allows to potentially copy more bytes than it should. ";
|
|
|
|
os << "Replace with the value ";
|
2018-07-20 05:50:03 +08:00
|
|
|
if (!DstName.empty())
|
2018-09-23 16:30:17 +08:00
|
|
|
os << "sizeof(" << DstName << ")";
|
|
|
|
else
|
|
|
|
os << "sizeof(<destination buffer>)";
|
|
|
|
os << " or lower";
|
2018-07-20 05:50:03 +08:00
|
|
|
|
2014-02-12 05:49:21 +08:00
|
|
|
BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument",
|
2018-09-23 16:30:17 +08:00
|
|
|
"C String API", os.str(), Loc,
|
|
|
|
LenArg->getSourceRange());
|
2012-02-01 03:33:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Recurse and check children.
|
|
|
|
VisitChildren(CE);
|
|
|
|
}
|
|
|
|
|
|
|
|
void WalkAST::VisitChildren(Stmt *S) {
|
2015-07-03 23:12:24 +08:00
|
|
|
for (Stmt *Child : S->children())
|
|
|
|
if (Child)
|
|
|
|
Visit(Child);
|
2012-02-01 03:33:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
class CStringSyntaxChecker: public Checker<check::ASTCodeBody> {
|
|
|
|
public:
|
|
|
|
|
|
|
|
void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr,
|
|
|
|
BugReporter &BR) const {
|
2014-02-12 05:49:21 +08:00
|
|
|
WalkAST walker(this, BR, Mgr.getAnalysisDeclContext(D));
|
2012-02-01 03:33:39 +08:00
|
|
|
walker.Visit(D->getBody());
|
|
|
|
}
|
|
|
|
};
|
2015-06-23 07:07:51 +08:00
|
|
|
}
|
2012-02-01 03:33:39 +08:00
|
|
|
|
|
|
|
void ento::registerCStringSyntaxChecker(CheckerManager &mgr) {
|
|
|
|
mgr.registerChecker<CStringSyntaxChecker>();
|
|
|
|
}
|
|
|
|
|
2019-01-26 22:23:08 +08:00
|
|
|
bool ento::shouldRegisterCStringSyntaxChecker(const LangOptions &LO) {
|
|
|
|
return true;
|
|
|
|
}
|