For printf format string checking, move the tracking of the data argument index out of

Sema and into analyze_printf::ParseFormatString().  Also use a bitvector to determine
what arguments have been covered (instead of just checking to see if the last argument consumed is the max argument).  This is prep. for support positional arguments (an IEEE extension).

llvm-svn: 97248
This commit is contained in:
Ted Kremenek 2010-02-26 19:18:41 +00:00
parent ed1b0c31a7
commit 4a49d9818b
5 changed files with 102 additions and 54 deletions

View File

@ -152,18 +152,21 @@ class OptionalAmount {
public:
enum HowSpecified { NotSpecified, Constant, Arg };
OptionalAmount(HowSpecified h, const char *st)
: start(st), hs(h), amt(0) {}
OptionalAmount(HowSpecified h, unsigned i, const char *st)
: start(st), hs(h), amt(i) {}
OptionalAmount()
: start(0), hs(NotSpecified), amt(0) {}
OptionalAmount(unsigned i, const char *st)
: start(st), hs(Constant), amt(i) {}
HowSpecified getHowSpecified() const { return hs; }
bool hasDataArgument() const { return hs == Arg; }
unsigned getArgIndex() const {
assert(hasDataArgument());
return amt;
}
unsigned getConstantAmount() const {
assert(hs == Constant);
return amt;
@ -188,14 +191,14 @@ class FormatSpecifier {
unsigned HasSpacePrefix : 1;
unsigned HasAlternativeForm : 1;
unsigned HasLeadingZeroes : 1;
unsigned flags : 5;
unsigned argIndex;
ConversionSpecifier CS;
OptionalAmount FieldWidth;
OptionalAmount Precision;
public:
FormatSpecifier() : LM(None),
IsLeftJustified(0), HasPlusPrefix(0), HasSpacePrefix(0),
HasAlternativeForm(0), HasLeadingZeroes(0) {}
HasAlternativeForm(0), HasLeadingZeroes(0), argIndex(0) {}
static FormatSpecifier Parse(const char *beg, const char *end);
@ -212,6 +215,16 @@ public:
void setHasAlternativeForm() { HasAlternativeForm = 1; }
void setHasLeadingZeros() { HasLeadingZeroes = 1; }
void setArgIndex(unsigned i) {
assert(CS.consumesDataArgument());
argIndex = i;
}
unsigned getArgIndex() const {
assert(CS.consumesDataArgument());
return argIndex;
}
// Methods for querying the format specifier.
const ConversionSpecifier &getConversionSpecifier() const {
@ -262,10 +275,10 @@ public:
virtual void HandleNullChar(const char *nullCharacter) {}
virtual void
virtual bool
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
unsigned specifierLen) {}
unsigned specifierLen) { return true; }
virtual bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,

View File

@ -2521,8 +2521,8 @@ def warn_printf_write_back : Warning<
InGroup<FormatSecurity>;
def warn_printf_insufficient_data_args : Warning<
"more '%%' conversions than data arguments">, InGroup<Format>;
def warn_printf_too_many_data_args : Warning<
"more data arguments than format specifiers">, InGroup<FormatExtraArgs>;
def warn_printf_data_arg_not_used : Warning<
"data argument not used by format string">, InGroup<FormatExtraArgs>;
def warn_printf_invalid_conversion : Warning<
"invalid conversion specifier '%0'">, InGroup<Format>;
def warn_printf_incomplete_specifier : Warning<

View File

@ -62,7 +62,8 @@ public:
// Methods for parsing format strings.
//===----------------------------------------------------------------------===//
static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
static OptionalAmount ParseAmount(const char *&Beg, const char *E,
unsigned &argIndex) {
const char *I = Beg;
UpdateOnReturn <const char*> UpdateBeg(Beg, I);
@ -78,11 +79,11 @@ static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
}
if (foundDigits)
return OptionalAmount(accumulator, Beg);
return OptionalAmount(OptionalAmount::Constant, accumulator, Beg);
if (c == '*') {
++I;
return OptionalAmount(OptionalAmount::Arg, Beg);
return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
}
break;
@ -93,7 +94,8 @@ static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
const char *&Beg,
const char *E) {
const char *E,
unsigned &argIndex) {
using namespace clang::analyze_printf;
@ -149,7 +151,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
}
// Look for the field width (if any).
FS.setFieldWidth(ParseAmount(I, E));
FS.setFieldWidth(ParseAmount(I, E, argIndex));
if (I == E) {
// No more characters left?
@ -165,7 +167,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
return true;
}
FS.setPrecision(ParseAmount(I, E));
FS.setPrecision(ParseAmount(I, E, argIndex));
if (I == E) {
// No more characters left?
@ -241,20 +243,26 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
// Glibc specific.
case 'm': k = ConversionSpecifier::PrintErrno; break;
}
FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
ConversionSpecifier CS(conversionPosition, k);
FS.setConversionSpecifier(CS);
if (CS.consumesDataArgument())
FS.setArgIndex(argIndex++);
if (k == ConversionSpecifier::InvalidSpecifier) {
H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
return false; // Keep processing format specifiers.
// Assume the conversion takes one argument.
return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
}
return FormatSpecifierResult(Start, FS);
}
bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
const char *I, const char *E) {
unsigned argIndex = 0;
// Keep looking for a format specifier until we have exhausted the string.
while (I != E) {
const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
// Did a fail-stop error of any kind occur when parsing the specifier?
// If so, don't do any more processing.
if (FSR.shouldStop())
@ -430,7 +438,7 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
return Ctx.LongDoubleTy;
return Ctx.DoubleTy;
}
switch (CS.getKind()) {
case ConversionSpecifier::CStrArg:
return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
@ -438,11 +446,11 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
// FIXME: This appears to be Mac OS X specific.
return ArgTypeResult::WCStrTy;
case ConversionSpecifier::CArg:
return Ctx.WCharTy;
return Ctx.WCharTy;
default:
break;
}
// FIXME: Handle other cases.
return ArgTypeResult();
}

View File

@ -955,6 +955,7 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull,
/// FormatGuard: Automatic Protection From printf Format String
/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
///
/// TODO:
/// Functionality implemented:
///
/// We can statically check the following properties for string
@ -965,7 +966,7 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull,
/// data arguments?
///
/// (2) Does each format conversion correctly match the type of the
/// corresponding data argument? (TODO)
/// corresponding data argument?
///
/// Moreover, for all printf functions we can:
///
@ -984,7 +985,6 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull,
///
/// All of these checks can be done by parsing the format string.
///
/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
void
Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
unsigned format_idx, unsigned firstDataArg) {
@ -1044,13 +1044,13 @@ class CheckPrintfHandler : public analyze_printf::FormatStringHandler {
Sema &S;
const StringLiteral *FExpr;
const Expr *OrigFormatExpr;
unsigned NumConversions;
const unsigned NumDataArgs;
const bool IsObjCLiteral;
const char *Beg; // Start of format string.
const bool HasVAListArg;
const CallExpr *TheCall;
unsigned FormatIdx;
llvm::BitVector CoveredArgs;
public:
CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
const Expr *origFormatExpr,
@ -1058,17 +1058,20 @@ public:
const char *beg, bool hasVAListArg,
const CallExpr *theCall, unsigned formatIdx)
: S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr),
NumConversions(0), NumDataArgs(numDataArgs),
NumDataArgs(numDataArgs),
IsObjCLiteral(isObjCLiteral), Beg(beg),
HasVAListArg(hasVAListArg),
TheCall(theCall), FormatIdx(formatIdx) {}
TheCall(theCall), FormatIdx(formatIdx) {
CoveredArgs.resize(numDataArgs);
CoveredArgs.reset();
}
void DoneProcessing();
void HandleIncompleteFormatSpecifier(const char *startSpecifier,
unsigned specifierLen);
void
bool
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
unsigned specifierLen);
@ -1117,18 +1120,35 @@ HandleIncompleteFormatSpecifier(const char *startSpecifier,
<< getFormatSpecifierRange(startSpecifier, specifierLen);
}
void CheckPrintfHandler::
bool CheckPrintfHandler::
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
unsigned specifierLen) {
++NumConversions;
unsigned argIndex = FS.getArgIndex();
bool keepGoing = true;
if (argIndex < NumDataArgs) {
// Consider the argument coverered, even though the specifier doesn't
// make sense.
CoveredArgs.set(argIndex);
}
else {
// If argIndex exceeds the number of data arguments we
// don't issue a warning because that is just a cascade of warnings (and
// they may have intended '%%' anyway). We don't want to continue processing
// the format string after this point, however, as we will like just get
// gibberish when trying to match arguments.
keepGoing = false;
}
const analyze_printf::ConversionSpecifier &CS =
FS.getConversionSpecifier();
SourceLocation Loc = getLocationOfByte(CS.getStart());
S.Diag(Loc, diag::warn_printf_invalid_conversion)
<< llvm::StringRef(CS.getStart(), CS.getLength())
<< getFormatSpecifierRange(startSpecifier, specifierLen);
return keepGoing;
}
void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) {
@ -1139,7 +1159,7 @@ void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) {
}
const Expr *CheckPrintfHandler::getDataArg(unsigned i) const {
return TheCall->getArg(FormatIdx + i);
return TheCall->getArg(FormatIdx + i + 1);
}
@ -1162,9 +1182,9 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
unsigned specifierLen) {
if (Amt.hasDataArgument()) {
++NumConversions;
if (!HasVAListArg) {
if (NumConversions > NumDataArgs) {
unsigned argIndex = Amt.getArgIndex();
if (argIndex >= NumDataArgs) {
S.Diag(getLocationOfByte(Amt.getStart()), MissingArgDiag)
<< getFormatSpecifierRange(startSpecifier, specifierLen);
// Don't do any more checking. We will just emit
@ -1176,7 +1196,8 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
// Although not in conformance with C99, we also allow the argument to be
// an 'unsigned int' as that is a reasonably safe case. GCC also
// doesn't emit a warning for that case.
const Expr *Arg = getDataArg(NumConversions);
CoveredArgs.set(argIndex);
const Expr *Arg = getDataArg(argIndex);
QualType T = Arg->getType();
const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context);
@ -1221,22 +1242,21 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
return false;
}
// Check for using an Objective-C specific conversion specifier
// in a non-ObjC literal.
if (!IsObjCLiteral && CS.isObjCArg()) {
HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
// Continue checking the other format specifiers.
return true;
}
if (!CS.consumesDataArgument()) {
// FIXME: Technically specifying a precision or field width here
// makes no sense. Worth issuing a warning at some point.
return true;
}
++NumConversions;
// Consume the argument.
unsigned argIndex = FS.getArgIndex();
CoveredArgs.set(argIndex);
// Check for using an Objective-C specific conversion specifier
// in a non-ObjC literal.
if (!IsObjCLiteral && CS.isObjCArg()) {
return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
}
// Are we using '%n'? Issue a warning about this being
// a possible security issue.
@ -1270,7 +1290,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
if (HasVAListArg)
return true;
if (NumConversions > NumDataArgs) {
if (argIndex >= NumDataArgs) {
S.Diag(getLocationOfByte(CS.getStart()),
diag::warn_printf_insufficient_data_args)
<< getFormatSpecifierRange(startSpecifier, specifierLen);
@ -1280,7 +1300,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
// Now type check the data expression that matches the
// format specifier.
const Expr *Ex = getDataArg(NumConversions);
const Expr *Ex = getDataArg(argIndex);
const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context);
if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) {
// Check if we didn't match because of an implicit cast from a 'char'
@ -1304,10 +1324,17 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
void CheckPrintfHandler::DoneProcessing() {
// Does the number of data arguments exceed the number of
// format conversions in the format string?
if (!HasVAListArg && NumConversions < NumDataArgs)
S.Diag(getDataArg(NumConversions+1)->getLocStart(),
diag::warn_printf_too_many_data_args)
<< getFormatStringRange();
if (!HasVAListArg) {
// Find any arguments that weren't covered.
CoveredArgs.flip();
signed notCoveredArg = CoveredArgs.find_first();
if (notCoveredArg >= 0) {
assert((unsigned)notCoveredArg < NumDataArgs);
S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(),
diag::warn_printf_data_arg_not_used)
<< getFormatStringRange();
}
}
}
void Sema::CheckPrintfString(const StringLiteral *FExpr,

View File

@ -55,7 +55,7 @@ void check_conditional_literal(const char* s, int i) {
printf(i == 1 ? "yes" : "no"); // no-warning
printf(i == 0 ? (i == 1 ? "yes" : "no") : "dont know"); // no-warning
printf(i == 0 ? (i == 1 ? s : "no") : "dont know"); // expected-warning{{format string is not a string literal}}
printf("yes" ?: "no %d", 1); // expected-warning{{more data arguments than format specifiers}}
printf("yes" ?: "no %d", 1); // expected-warning{{data argument not used by format string}}
}
void check_writeback_specifier()
@ -155,7 +155,7 @@ void test10(int x, float f, int i, long long lli) {
printf("%**\n"); // expected-warning{{invalid conversion specifier '*'}}
printf("%n", &i); // expected-warning{{use of '%n' in format string discouraged (potentially insecure)}}
printf("%d%d\n", x); // expected-warning{{more '%' conversions than data arguments}}
printf("%d\n", x, x); // expected-warning{{more data arguments than format specifiers}}
printf("%d\n", x, x); // expected-warning{{data argument not used by format string}}
printf("%W%d%Z\n", x, x, x); // expected-warning{{invalid conversion specifier 'W'}} expected-warning{{invalid conversion specifier 'Z'}}
printf("%"); // expected-warning{{incomplete format specifier}}
printf("%.d", x); // no-warning