forked from OSchip/llvm-project
initial support for checking format strings, patch by Ted Kremenek:
"I've coded up some support in clang to flag warnings for non-constant format strings used in calls to printf-like functions (all the functions listed in "man fprintf"). Non-constant format strings are a source of many security exploits in C/C++ programs, and I believe are currently detected by gcc using the flag -Wformat-nonliteral." llvm-svn: 41003
This commit is contained in:
parent
d79671fdf2
commit
b87b1b36ee
|
@ -16,10 +16,26 @@
|
|||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/Lex/Preprocessor.h"
|
||||
#include "clang/Basic/Diagnostic.h"
|
||||
|
||||
using namespace clang;
|
||||
|
||||
Sema::Sema(Preprocessor &pp, ASTContext &ctxt, std::vector<Decl*> &prevInGroup)
|
||||
: PP(pp), Context(ctxt), CurFunctionDecl(0), LastInGroupList(prevInGroup) {
|
||||
|
||||
// Get IdentifierInfo objects for known functions for which we
|
||||
// do extra checking.
|
||||
IdentifierTable& IT = PP.getIdentifierTable();
|
||||
|
||||
KnownFunctionIDs[ id_printf ] = &IT.get("printf");
|
||||
KnownFunctionIDs[ id_fprintf ] = &IT.get("fprintf");
|
||||
KnownFunctionIDs[ id_sprintf ] = &IT.get("sprintf");
|
||||
KnownFunctionIDs[ id_snprintf ] = &IT.get("snprintf");
|
||||
KnownFunctionIDs[ id_vsnprintf ] = &IT.get("vsnprintf");
|
||||
KnownFunctionIDs[ id_asprintf ] = &IT.get("asprintf");
|
||||
KnownFunctionIDs[ id_vasprintf ] = &IT.get("vasprintf");
|
||||
KnownFunctionIDs[ id_vfprintf ] = &IT.get("vfprintf");
|
||||
KnownFunctionIDs[ id_vsprintf ] = &IT.get("vsprintf");
|
||||
KnownFunctionIDs[ id_vprintf ] = &IT.get("vprintf");
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -68,6 +68,28 @@ class Sema : public Action {
|
|||
/// us to associate a raw vector type with one of the OCU type names.
|
||||
/// This is only necessary for issuing pretty diagnostics.
|
||||
llvm::SmallVector<TypedefDecl*, 24> OCUVectorDecls;
|
||||
|
||||
// Enum values used by KnownFunctionIDs (see below).
|
||||
enum {
|
||||
id_printf,
|
||||
id_fprintf,
|
||||
id_sprintf,
|
||||
id_snprintf,
|
||||
id_vsnprintf,
|
||||
id_asprintf,
|
||||
id_vasprintf,
|
||||
id_vfprintf,
|
||||
id_vsprintf,
|
||||
id_vprintf,
|
||||
id_num_known_functions
|
||||
};
|
||||
|
||||
/// KnownFunctionIDs - This is a list of IdentifierInfo objects to a set
|
||||
/// of known functions used by the semantic analysis to do various
|
||||
/// kinds of checking (e.g. checking format string errors in printf calls).
|
||||
/// This list is populated upon the creation of a Sema object.
|
||||
IdentifierInfo* KnownFunctionIDs[ id_num_known_functions ];
|
||||
|
||||
public:
|
||||
Sema(Preprocessor &pp, ASTContext &ctxt, std::vector<Decl*> &prevInGroup);
|
||||
|
||||
|
@ -395,7 +417,17 @@ private:
|
|||
/// a constant expression of type int with a value greater than zero. If the
|
||||
/// array has an incomplete type or a valid constant size, return false,
|
||||
/// otherwise emit a diagnostic and return true.
|
||||
bool VerifyConstantArrayType(const ArrayType *ary, SourceLocation loc);
|
||||
bool VerifyConstantArrayType(const ArrayType *ary, SourceLocation loc);
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Extra semantic analysis beyond the C type system
|
||||
private:
|
||||
|
||||
void CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl,
|
||||
Expr** Args, unsigned NumArgsInCall);
|
||||
|
||||
void CheckPrintfArguments(Expr *Fn, FunctionDecl *FDecl, unsigned format_idx,
|
||||
Expr** Args, unsigned NumArgsInCall);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Ted Kremenek and is distributed under
|
||||
// the University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements extra semantic analysis beyond what is enforced
|
||||
// by the C type system.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Sema.h"
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/AST/Decl.h"
|
||||
#include "clang/AST/Expr.h"
|
||||
#include "clang/Lex/Preprocessor.h"
|
||||
#include "clang/Lex/LiteralSupport.h"
|
||||
#include "clang/Basic/SourceManager.h"
|
||||
#include "clang/Basic/Diagnostic.h"
|
||||
#include "clang/Basic/LangOptions.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
using namespace clang;
|
||||
|
||||
/// CheckFunctionCall - Check a direct function call for various correctness
|
||||
/// and safety properties not strictly enforced by the C type system.
|
||||
void
|
||||
Sema::CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl,
|
||||
Expr** Args, unsigned NumArgsInCall) {
|
||||
|
||||
// Get the IdentifierInfo* for the called function.
|
||||
IdentifierInfo *FnInfo = FDecl->getIdentifier();
|
||||
|
||||
// Search the KnownFunctionIDs for the identifier.
|
||||
unsigned i = 0, e = id_num_known_functions;
|
||||
for ( ; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
|
||||
if( i == e ) return;
|
||||
|
||||
// Printf checking.
|
||||
if (i <= id_vprintf) {
|
||||
// Retrieve the index of the format string parameter.
|
||||
unsigned format_idx = 0;
|
||||
switch (i) {
|
||||
default: assert(false && "No format string argument index.");
|
||||
case id_printf: format_idx = 0; break;
|
||||
case id_fprintf: format_idx = 1; break;
|
||||
case id_sprintf: format_idx = 1; break;
|
||||
case id_snprintf: format_idx = 2; break;
|
||||
case id_vsnprintf: format_idx = 2; break;
|
||||
case id_asprintf: format_idx = 1; break;
|
||||
case id_vasprintf: format_idx = 1; break;
|
||||
case id_vfprintf: format_idx = 1; break;
|
||||
case id_vsprintf: format_idx = 1; break;
|
||||
case id_vprintf: format_idx = 1; break;
|
||||
}
|
||||
CheckPrintfArguments(Fn, FDecl, format_idx, Args, NumArgsInCall);
|
||||
}
|
||||
}
|
||||
|
||||
/// CheckPrintfArguments - Check calls to printf (and similar functions) for
|
||||
/// correct use of format strings. Improper format strings to functions in
|
||||
/// the printf family can be the source of bizarre bugs and very serious
|
||||
/// security holes. A good source of information is available in the following
|
||||
/// paper (which includes additional references):
|
||||
///
|
||||
/// FormatGuard: Automatic Protection From printf Format String
|
||||
/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
|
||||
void
|
||||
Sema::CheckPrintfArguments(Expr *Fn, FunctionDecl *FDecl, unsigned format_idx,
|
||||
Expr** Args, unsigned NumArgsInCall) {
|
||||
|
||||
assert( format_idx < NumArgsInCall );
|
||||
|
||||
// CHECK: format string is not a string literal.
|
||||
//
|
||||
// Dynamically generated format strings are difficult to automatically
|
||||
// vet at compile time. Requiring that format strings are string literals
|
||||
// (1) permits the checking of format strings by the compiler and thereby
|
||||
// (2) can practically remove the source of many format string exploits.
|
||||
|
||||
StringLiteral *FExpr = dyn_cast<StringLiteral>(Args[format_idx]);
|
||||
|
||||
if ( FExpr == NULL )
|
||||
Diag( Args[format_idx]->getLocStart(),
|
||||
diag::warn_printf_not_string_constant, Fn->getSourceRange() );
|
||||
}
|
|
@ -22,6 +22,7 @@
|
|||
#include "clang/Basic/LangOptions.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
using namespace clang;
|
||||
|
||||
/// ParseStringLiteral - The specified tokens were lexed as pasted string
|
||||
|
@ -555,6 +556,13 @@ ParseCallExpr(ExprTy *fn, SourceLocation LParenLoc,
|
|||
if (NumArgsInCall != NumArgsInProto && !proto->isVariadic())
|
||||
return true;
|
||||
}
|
||||
|
||||
// Do special checking on direct calls to functions.
|
||||
if (ImplicitCastExpr *IcExpr = dyn_cast<ImplicitCastExpr>(Fn))
|
||||
if (DeclRefExpr *DRExpr = dyn_cast<DeclRefExpr>(IcExpr->getSubExpr()))
|
||||
if (FunctionDecl *FDecl = dyn_cast<FunctionDecl>(DRExpr->getDecl()))
|
||||
CheckFunctionCall(Fn, FDecl, Args, NumArgsInCall);
|
||||
|
||||
return new CallExpr(Fn, Args, NumArgsInCall, resultType, RParenLoc);
|
||||
}
|
||||
|
||||
|
|
|
@ -120,6 +120,7 @@
|
|||
DEF2E95F0C5FBD74000C4259 /* InternalsManual.html in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEF2E95E0C5FBD74000C4259 /* InternalsManual.html */; };
|
||||
DEF2EDA70C6A4252000C4259 /* StmtDumper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEF2EDA60C6A4252000C4259 /* StmtDumper.cpp */; };
|
||||
DEF2EFF30C6CDD74000C4259 /* CGAggExpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEF2EFF20C6CDD74000C4259 /* CGAggExpr.cpp */; };
|
||||
DEF2F0100C6CFED5000C4259 /* SemaChecking.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEF2F00F0C6CFED5000C4259 /* SemaChecking.cpp */; };
|
||||
F0226FD20C18084500141F42 /* TextDiagnosticPrinter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0226FD00C18084500141F42 /* TextDiagnosticPrinter.cpp */; };
|
||||
F0226FD30C18084500141F42 /* TextDiagnosticPrinter.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = F0226FD10C18084500141F42 /* TextDiagnosticPrinter.h */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
@ -307,6 +308,7 @@
|
|||
DEF2E95E0C5FBD74000C4259 /* InternalsManual.html */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text.html; name = InternalsManual.html; path = docs/InternalsManual.html; sourceTree = "<group>"; };
|
||||
DEF2EDA60C6A4252000C4259 /* StmtDumper.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = StmtDumper.cpp; path = AST/StmtDumper.cpp; sourceTree = "<group>"; };
|
||||
DEF2EFF20C6CDD74000C4259 /* CGAggExpr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CGAggExpr.cpp; path = CodeGen/CGAggExpr.cpp; sourceTree = "<group>"; };
|
||||
DEF2F00F0C6CFED5000C4259 /* SemaChecking.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaChecking.cpp; path = Sema/SemaChecking.cpp; sourceTree = "<group>"; };
|
||||
F0226FD00C18084500141F42 /* TextDiagnosticPrinter.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TextDiagnosticPrinter.cpp; path = Driver/TextDiagnosticPrinter.cpp; sourceTree = "<group>"; };
|
||||
F0226FD10C18084500141F42 /* TextDiagnosticPrinter.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TextDiagnosticPrinter.h; path = Driver/TextDiagnosticPrinter.h; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
@ -401,6 +403,7 @@
|
|||
DE67E7190C020F4F00F66BC5 /* ASTStreamer.cpp */,
|
||||
DE67E7140C020EDF00F66BC5 /* Sema.h */,
|
||||
DE67E7160C020EE400F66BC5 /* Sema.cpp */,
|
||||
DEF2F00F0C6CFED5000C4259 /* SemaChecking.cpp */,
|
||||
DE67E7120C020ED900F66BC5 /* SemaDecl.cpp */,
|
||||
DE67E7100C020ED400F66BC5 /* SemaExpr.cpp */,
|
||||
DE67E70E0C020ECF00F66BC5 /* SemaExprCXX.cpp */,
|
||||
|
@ -683,6 +686,7 @@
|
|||
DEF2E9340C5FBA02000C4259 /* ASTStreamers.cpp in Sources */,
|
||||
DEF2EDA70C6A4252000C4259 /* StmtDumper.cpp in Sources */,
|
||||
DEF2EFF30C6CDD74000C4259 /* CGAggExpr.cpp in Sources */,
|
||||
DEF2F0100C6CFED5000C4259 /* SemaChecking.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
|
|
@ -658,6 +658,10 @@ DIAG(err_typecheck_choose_expr_requires_constant, ERROR,
|
|||
"'__builtin_choose_expr' requires a constant expression")
|
||||
DIAG(warn_unused_expr, WARNING,
|
||||
"expression result unused")
|
||||
|
||||
// Printf checking
|
||||
DIAG(warn_printf_not_string_constant, WARNING,
|
||||
"format string is not a string literal")
|
||||
|
||||
// Statements.
|
||||
DIAG(err_continue_not_in_loop, ERROR,
|
||||
|
@ -690,4 +694,6 @@ DIAG(ext_return_missing_expr, EXTENSION,
|
|||
DIAG(ext_return_has_expr, EXTENSION,
|
||||
"void function '%0' should not return a value")
|
||||
|
||||
|
||||
|
||||
#undef DIAG
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
// RUN: clang -parse-ast-check %s
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
void check_string_literal( FILE* fp, const char* s, char *buf, ... ) {
|
||||
|
||||
char * b;
|
||||
va_list ap;
|
||||
va_start(ap,buf);
|
||||
|
||||
printf(s); // expected-warning {{format string is not a string literal}}
|
||||
vprintf(s,ap); // expected-warning {{format string is not a string liter}}
|
||||
fprintf(fp,s); // expected-warning {{format string is not a string literal}}
|
||||
vfprintf(fp,s,ap); // expected-warning {{format string is not a string lit}}
|
||||
asprintf(&b,s); // expected-warning {{format string is not a string lit}}
|
||||
vasprintf(&b,s,ap); // expected-warning {{format string is not a string lit}}
|
||||
sprintf(buf,s); // expected-warning {{format string is not a string literal}}
|
||||
snprintf(buf,2,s); // expected-warning {{format string is not a string lit}}
|
||||
vsprintf(buf,s,ap); // expected-warning {{format string is not a string lit}}
|
||||
vsnprintf(buf,2,s,ap); // expected-warning {{mat string is not a string lit}}
|
||||
}
|
||||
|
Loading…
Reference in New Issue