For printf format string checking, add support for positional format strings.

Along the way, coelesce some of the diagnostics.

llvm-svn: 97297
This commit is contained in:
Ted Kremenek 2010-02-27 01:41:03 +00:00
parent 89a56c561f
commit d166819c26
5 changed files with 234 additions and 50 deletions

View File

@ -150,13 +150,17 @@ enum LengthModifier {
class OptionalAmount {
public:
enum HowSpecified { NotSpecified, Constant, Arg };
enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
OptionalAmount(HowSpecified h, unsigned i, const char *st)
: start(st), hs(h), amt(i) {}
OptionalAmount()
: start(0), hs(NotSpecified), amt(0) {}
OptionalAmount(bool b = true)
: start(0), hs(b ? NotSpecified : Invalid), amt(0) {}
bool isInvalid() const {
return hs == Invalid;
}
HowSpecified getHowSpecified() const { return hs; }
@ -191,6 +195,7 @@ class FormatSpecifier {
unsigned HasSpacePrefix : 1;
unsigned HasAlternativeForm : 1;
unsigned HasLeadingZeroes : 1;
unsigned UsesPositionalArg : 1;
unsigned argIndex;
ConversionSpecifier CS;
OptionalAmount FieldWidth;
@ -198,7 +203,8 @@ class FormatSpecifier {
public:
FormatSpecifier() : LM(None),
IsLeftJustified(0), HasPlusPrefix(0), HasSpacePrefix(0),
HasAlternativeForm(0), HasLeadingZeroes(0), argIndex(0) {}
HasAlternativeForm(0), HasLeadingZeroes(0), UsesPositionalArg(0),
argIndex(0) {}
static FormatSpecifier Parse(const char *beg, const char *end);
@ -214,6 +220,7 @@ public:
void setHasSpacePrefix() { HasSpacePrefix = 1; }
void setHasAlternativeForm() { HasAlternativeForm = 1; }
void setHasLeadingZeros() { HasLeadingZeroes = 1; }
void setUsesPositionalArg() { UsesPositionalArg = 1; }
void setArgIndex(unsigned i) {
assert(CS.consumesDataArgument());
@ -263,8 +270,11 @@ public:
bool hasAlternativeForm() const { return (bool) HasAlternativeForm; }
bool hasLeadingZeros() const { return (bool) HasLeadingZeroes; }
bool hasSpacePrefix() const { return (bool) HasSpacePrefix; }
bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
};
enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
class FormatStringHandler {
public:
FormatStringHandler() {}
@ -275,6 +285,11 @@ public:
virtual void HandleNullChar(const char *nullCharacter) {}
virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
PositionContext p) {}
virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
virtual bool
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,

View File

@ -2534,6 +2534,15 @@ def warn_printf_missing_format_string : Warning<
def warn_printf_conversion_argument_type_mismatch : Warning<
"conversion specifies type %0 but the argument has type %1">,
InGroup<Format>;
def warn_printf_zero_positional_specifier : Warning<
"position arguments in format strings start counting at 1 (not 0)">,
InGroup<Format>;
def warn_printf_invalid_positional_specifier : Warning<
"invalid position specified for %select{field width|field precision}0">,
InGroup<Format>;
def warn_printf_mix_positional_nonpositional_args : Warning<
"cannot mix positional and non-positional arguments in format string">,
InGroup<Format>;
def warn_null_arg : Warning<
"null passed to a callee which requires a non-null argument">,
InGroup<NonNull>;
@ -2543,15 +2552,10 @@ def warn_printf_format_string_is_wide_literal : Warning<
"format string should not be a wide string">, InGroup<Format>;
def warn_printf_format_string_contains_null_char : Warning<
"format string contains '\\0' within the string body">, InGroup<Format>;
def warn_printf_asterisk_width_missing_arg : Warning<
"'*' specified field width is missing a matching 'int' argument">;
def warn_printf_asterisk_precision_missing_arg : Warning<
"'.*' specified field precision is missing a matching 'int' argument">;
def warn_printf_asterisk_width_wrong_type : Warning<
"field width should have type %0, but argument has type %1">,
InGroup<Format>;
def warn_printf_asterisk_precision_wrong_type : Warning<
"field precision should have type %0, but argument has type %1">,
def warn_printf_asterisk_missing_arg : Warning<
"'%select{*|.*}0' specified field %select{width|precision}0 is missing a matching 'int' argument">;
def warn_printf_asterisk_wrong_type : Warning<
"field %select{width|precision}0 should have type %1, but argument has type %2">,
InGroup<Format>;
def warn_printf_nonsensical_precision: Warning<
"precision used in '%0' conversion specifier (where it has no meaning)">,

View File

@ -15,10 +15,12 @@
#include "clang/Analysis/Analyses/PrintfFormatString.h"
#include "clang/AST/ASTContext.h"
using clang::analyze_printf::FormatSpecifier;
using clang::analyze_printf::OptionalAmount;
using clang::analyze_printf::ArgTypeResult;
using clang::analyze_printf::FormatSpecifier;
using clang::analyze_printf::FormatStringHandler;
using clang::analyze_printf::OptionalAmount;
using clang::analyze_printf::PositionContext;
using namespace clang;
namespace {
@ -62,36 +64,141 @@ public:
// Methods for parsing format strings.
//===----------------------------------------------------------------------===//
static OptionalAmount ParseAmount(const char *&Beg, const char *E,
unsigned &argIndex) {
static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
const char *I = Beg;
UpdateOnReturn <const char*> UpdateBeg(Beg, I);
bool foundDigits = false;
unsigned accumulator = 0;
for ( ; I != E; ++I) {
char c = *I;
if (c >= '0' && c <= '9') {
foundDigits = true;
// Ignore '0' on the first character.
if (c == '0' && I == Beg)
break;
accumulator += (accumulator * 10) + (c - '0');
continue;
}
if (foundDigits)
if (accumulator)
return OptionalAmount(OptionalAmount::Constant, accumulator, Beg);
if (c == '*') {
++I;
return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
}
break;
}
return OptionalAmount();
}
static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
unsigned &argIndex) {
if (*Beg == '*') {
++Beg;
return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
}
return ParseAmount(Beg, E);
}
static OptionalAmount ParsePositionAmount(FormatStringHandler &H,
const char *Start,
const char *&Beg, const char *E,
PositionContext p) {
if (*Beg == '*') {
const char *I = Beg + 1;
const OptionalAmount &Amt = ParseAmount(I, E);
if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
H.HandleInvalidPosition(Beg, I - Beg, p);
return OptionalAmount(false);
}
if (I== E) {
// No more characters left?
H.HandleIncompleteFormatSpecifier(Start, E - Start);
return OptionalAmount(false);
}
if (*I == '$') {
const char *Tmp = Beg;
Beg = ++I;
return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
Tmp);
}
H.HandleInvalidPosition(Beg, I - Beg, p);
return OptionalAmount(false);
}
return ParseAmount(Beg, E);
}
static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
const char *Start, const char *&Beg, const char *E,
unsigned *argIndex) {
if (argIndex) {
FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
}
else {
const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
analyze_printf::PrecisionPos);
if (Amt.isInvalid())
return true;
FS.setPrecision(Amt);
}
return false;
}
static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS,
const char *Start, const char *&Beg, const char *E,
unsigned *argIndex) {
// FIXME: Support negative field widths.
if (argIndex) {
FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
}
else {
const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
analyze_printf::FieldWidthPos);
if (Amt.isInvalid())
return true;
FS.setFieldWidth(Amt);
}
return false;
}
static bool ParseArgPosition(FormatStringHandler &H,
FormatSpecifier &FS, const char *Start,
const char *&Beg, const char *E) {
using namespace clang::analyze_printf;
const char *I = Beg;
const OptionalAmount &Amt = ParseAmount(I, E);
if (I == E) {
// No more characters left?
H.HandleIncompleteFormatSpecifier(Start, E - Start);
return true;
}
if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
FS.setArgIndex(Amt.getConstantAmount() - 1);
FS.setUsesPositionalArg();
// Update the caller's pointer if we decided to consume
// these characters.
Beg = I;
return false;
}
// Special case: '%0$', since this is an easy mistake.
if (*I == '0' && (I+1) != E && *(I+1) == '$') {
H.HandleZeroPosition(Start, I - Start + 2);
return true;
}
return false;
}
static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
const char *&Beg,
const char *E,
@ -128,6 +235,14 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
}
FormatSpecifier FS;
if (ParseArgPosition(H, FS, Start, I, E))
return true;
if (I == E) {
// No more characters left?
H.HandleIncompleteFormatSpecifier(Start, E - Start);
return true;
}
// Look for flags (if any).
bool hasMore = true;
@ -151,7 +266,9 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
}
// Look for the field width (if any).
FS.setFieldWidth(ParseAmount(I, E, argIndex));
if (ParseFieldWidth(H, FS, Start, I, E,
FS.usesPositionalArg() ? 0 : &argIndex))
return true;
if (I == E) {
// No more characters left?
@ -167,7 +284,9 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
return true;
}
FS.setPrecision(ParseAmount(I, E, argIndex));
if (ParsePrecision(H, FS, Start, I, E,
FS.usesPositionalArg() ? 0 : &argIndex))
return true;
if (I == E) {
// No more characters left?
@ -245,7 +364,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
}
ConversionSpecifier CS(conversionPosition, k);
FS.setConversionSpecifier(CS);
if (CS.consumesDataArgument())
if (CS.consumesDataArgument() && !FS.usesPositionalArg())
FS.setArgIndex(argIndex++);
if (k == ConversionSpecifier::InvalidSpecifier) {

View File

@ -110,7 +110,8 @@ bool Sema::CheckablePrintfAttr(const FormatAttr *Format, CallExpr *TheCall) {
}
if (format_idx < TheCall->getNumArgs()) {
Expr *Format = TheCall->getArg(format_idx)->IgnoreParenCasts();
if (!Format->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull))
if (!Format->isNullPointerConstant(Context,
Expr::NPC_ValueDependentIsNull))
return true;
}
}
@ -1051,6 +1052,8 @@ class CheckPrintfHandler : public analyze_printf::FormatStringHandler {
const CallExpr *TheCall;
unsigned FormatIdx;
llvm::BitVector CoveredArgs;
bool usesPositionalArgs;
bool atFirstArg;
public:
CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
const Expr *origFormatExpr,
@ -1061,7 +1064,8 @@ public:
NumDataArgs(numDataArgs),
IsObjCLiteral(isObjCLiteral), Beg(beg),
HasVAListArg(hasVAListArg),
TheCall(theCall), FormatIdx(formatIdx) {
TheCall(theCall), FormatIdx(formatIdx),
usesPositionalArgs(false), atFirstArg(true) {
CoveredArgs.resize(numDataArgs);
CoveredArgs.reset();
}
@ -1076,6 +1080,12 @@ public:
const char *startSpecifier,
unsigned specifierLen);
virtual void HandleInvalidPosition(const char *startSpecifier,
unsigned specifierLen,
analyze_printf::PositionContext p);
virtual void HandleZeroPosition(const char *startPos, unsigned posLen);
void HandleNullChar(const char *nullCharacter);
bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS,
@ -1087,9 +1097,8 @@ private:
unsigned specifierLen);
SourceLocation getLocationOfByte(const char *x);
bool HandleAmount(const analyze_printf::OptionalAmount &Amt,
unsigned MissingArgDiag, unsigned BadTypeDiag,
const char *startSpecifier, unsigned specifierLen);
bool HandleAmount(const analyze_printf::OptionalAmount &Amt, unsigned k,
const char *startSpecifier, unsigned specifierLen);
void HandleFlags(const analyze_printf::FormatSpecifier &FS,
llvm::StringRef flag, llvm::StringRef cspec,
const char *startSpecifier, unsigned specifierLen);
@ -1120,6 +1129,21 @@ HandleIncompleteFormatSpecifier(const char *startSpecifier,
<< getFormatSpecifierRange(startSpecifier, specifierLen);
}
void
CheckPrintfHandler::HandleInvalidPosition(const char *startPos, unsigned posLen,
analyze_printf::PositionContext p) {
SourceLocation Loc = getLocationOfByte(startPos);
S.Diag(Loc, diag::warn_printf_invalid_positional_specifier)
<< (unsigned) p << getFormatSpecifierRange(startPos, posLen);
}
void CheckPrintfHandler::HandleZeroPosition(const char *startPos,
unsigned posLen) {
SourceLocation Loc = getLocationOfByte(startPos);
S.Diag(Loc, diag::warn_printf_zero_positional_specifier)
<< getFormatSpecifierRange(startPos, posLen);
}
bool CheckPrintfHandler::
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
@ -1176,17 +1200,16 @@ void CheckPrintfHandler::HandleFlags(const analyze_printf::FormatSpecifier &FS,
bool
CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
unsigned MissingArgDiag,
unsigned BadTypeDiag,
const char *startSpecifier,
unsigned k, const char *startSpecifier,
unsigned specifierLen) {
if (Amt.hasDataArgument()) {
if (!HasVAListArg) {
unsigned argIndex = Amt.getArgIndex();
if (argIndex >= NumDataArgs) {
S.Diag(getLocationOfByte(Amt.getStart()), MissingArgDiag)
<< getFormatSpecifierRange(startSpecifier, specifierLen);
S.Diag(getLocationOfByte(Amt.getStart()),
diag::warn_printf_asterisk_missing_arg)
<< k << getFormatSpecifierRange(startSpecifier, specifierLen);
// Don't do any more checking. We will just emit
// spurious errors.
return false;
@ -1204,7 +1227,9 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
assert(ATR.isValid());
if (!ATR.matchesType(S.Context, T)) {
S.Diag(getLocationOfByte(Amt.getStart()), BadTypeDiag)
S.Diag(getLocationOfByte(Amt.getStart()),
diag::warn_printf_asterisk_wrong_type)
<< k
<< ATR.getRepresentativeType(S.Context) << T
<< getFormatSpecifierRange(startSpecifier, specifierLen)
<< Arg->getSourceRange();
@ -1223,22 +1248,30 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
const char *startSpecifier,
unsigned specifierLen) {
using namespace analyze_printf;
using namespace analyze_printf;
const ConversionSpecifier &CS = FS.getConversionSpecifier();
// First check if the field width, precision, and conversion specifier
// have matching data arguments.
if (!HandleAmount(FS.getFieldWidth(),
diag::warn_printf_asterisk_width_missing_arg,
diag::warn_printf_asterisk_width_wrong_type,
startSpecifier, specifierLen)) {
if (atFirstArg) {
atFirstArg = false;
usesPositionalArgs = FS.usesPositionalArg();
}
else if (usesPositionalArgs != FS.usesPositionalArg()) {
// Cannot mix-and-match positional and non-positional arguments.
S.Diag(getLocationOfByte(CS.getStart()),
diag::warn_printf_mix_positional_nonpositional_args)
<< getFormatSpecifierRange(startSpecifier, specifierLen);
return false;
}
if (!HandleAmount(FS.getPrecision(),
diag::warn_printf_asterisk_precision_missing_arg,
diag::warn_printf_asterisk_precision_wrong_type,
startSpecifier, specifierLen)) {
// First check if the field width, precision, and conversion specifier
// have matching data arguments.
if (!HandleAmount(FS.getFieldWidth(), /* field width */ 0,
startSpecifier, specifierLen)) {
return false;
}
if (!HandleAmount(FS.getPrecision(), /* precision */ 1,
startSpecifier, specifierLen)) {
return false;
}

View File

@ -221,3 +221,16 @@ void test_unicode_conversions(wchar_t *s) {
printf("%S", "hello"); // expected-warning{{but the argument has type 'char *'}}
}
// Mac OS X supports positional arguments in format strings.
// This is an IEEE extension (IEEE Std 1003.1).
// FIXME: This is probably not portable everywhere.
void test_positional_arguments() {
printf("%0$", (int)2); // expected-warning{{position arguments in format strings start counting at 1 (not 0)}}
printf("%1$d", (int) 2); // no-warning
printf("%1$d", (int) 2, 2); // expected-warning{{data argument not used by format string}}
printf("%1$d%1$f", (int) 2); // expected-warning{{conversion specifies type 'double' but the argument has type 'int'}}
printf("%1$2.2d", (int) 2); // no-warning
printf("%2$*1$.2d", (int) 2, (int) 3); // no-warning
printf("%2$*8$d", (int) 2, (int) 3); // expected-warning{{specified field width is missing a matching 'int' argument}}
}