diff --git a/clang/include/clang/Analysis/Analyses/PrintfFormatString.h b/clang/include/clang/Analysis/Analyses/PrintfFormatString.h index f1e822090879..f66892f1859b 100644 --- a/clang/include/clang/Analysis/Analyses/PrintfFormatString.h +++ b/clang/include/clang/Analysis/Analyses/PrintfFormatString.h @@ -152,18 +152,21 @@ class OptionalAmount { public: enum HowSpecified { NotSpecified, Constant, Arg }; - OptionalAmount(HowSpecified h, const char *st) - : start(st), hs(h), amt(0) {} + OptionalAmount(HowSpecified h, unsigned i, const char *st) + : start(st), hs(h), amt(i) {} OptionalAmount() : start(0), hs(NotSpecified), amt(0) {} - OptionalAmount(unsigned i, const char *st) - : start(st), hs(Constant), amt(i) {} - HowSpecified getHowSpecified() const { return hs; } + bool hasDataArgument() const { return hs == Arg; } + unsigned getArgIndex() const { + assert(hasDataArgument()); + return amt; + } + unsigned getConstantAmount() const { assert(hs == Constant); return amt; @@ -188,14 +191,14 @@ class FormatSpecifier { unsigned HasSpacePrefix : 1; unsigned HasAlternativeForm : 1; unsigned HasLeadingZeroes : 1; - unsigned flags : 5; + unsigned argIndex; ConversionSpecifier CS; OptionalAmount FieldWidth; OptionalAmount Precision; public: FormatSpecifier() : LM(None), IsLeftJustified(0), HasPlusPrefix(0), HasSpacePrefix(0), - HasAlternativeForm(0), HasLeadingZeroes(0) {} + HasAlternativeForm(0), HasLeadingZeroes(0), argIndex(0) {} static FormatSpecifier Parse(const char *beg, const char *end); @@ -212,6 +215,16 @@ public: void setHasAlternativeForm() { HasAlternativeForm = 1; } void setHasLeadingZeros() { HasLeadingZeroes = 1; } + void setArgIndex(unsigned i) { + assert(CS.consumesDataArgument()); + argIndex = i; + } + + unsigned getArgIndex() const { + assert(CS.consumesDataArgument()); + return argIndex; + } + // Methods for querying the format specifier. const ConversionSpecifier &getConversionSpecifier() const { @@ -262,10 +275,10 @@ public: virtual void HandleNullChar(const char *nullCharacter) {} - virtual void + virtual bool HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, - unsigned specifierLen) {} + unsigned specifierLen) { return true; } virtual bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 52fbbcbb00a7..3d9253db334a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2521,8 +2521,8 @@ def warn_printf_write_back : Warning< InGroup; def warn_printf_insufficient_data_args : Warning< "more '%%' conversions than data arguments">, InGroup; -def warn_printf_too_many_data_args : Warning< - "more data arguments than format specifiers">, InGroup; +def warn_printf_data_arg_not_used : Warning< + "data argument not used by format string">, InGroup; def warn_printf_invalid_conversion : Warning< "invalid conversion specifier '%0'">, InGroup; def warn_printf_incomplete_specifier : Warning< diff --git a/clang/lib/Analysis/PrintfFormatString.cpp b/clang/lib/Analysis/PrintfFormatString.cpp index 3aee57cb8196..9c0f8139640c 100644 --- a/clang/lib/Analysis/PrintfFormatString.cpp +++ b/clang/lib/Analysis/PrintfFormatString.cpp @@ -62,7 +62,8 @@ public: // Methods for parsing format strings. //===----------------------------------------------------------------------===// -static OptionalAmount ParseAmount(const char *&Beg, const char *E) { +static OptionalAmount ParseAmount(const char *&Beg, const char *E, + unsigned &argIndex) { const char *I = Beg; UpdateOnReturn UpdateBeg(Beg, I); @@ -78,11 +79,11 @@ static OptionalAmount ParseAmount(const char *&Beg, const char *E) { } if (foundDigits) - return OptionalAmount(accumulator, Beg); + return OptionalAmount(OptionalAmount::Constant, accumulator, Beg); if (c == '*') { ++I; - return OptionalAmount(OptionalAmount::Arg, Beg); + return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg); } break; @@ -93,7 +94,8 @@ static OptionalAmount ParseAmount(const char *&Beg, const char *E) { static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, const char *&Beg, - const char *E) { + const char *E, + unsigned &argIndex) { using namespace clang::analyze_printf; @@ -149,7 +151,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, } // Look for the field width (if any). - FS.setFieldWidth(ParseAmount(I, E)); + FS.setFieldWidth(ParseAmount(I, E, argIndex)); if (I == E) { // No more characters left? @@ -165,7 +167,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, return true; } - FS.setPrecision(ParseAmount(I, E)); + FS.setPrecision(ParseAmount(I, E, argIndex)); if (I == E) { // No more characters left? @@ -241,20 +243,26 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, // Glibc specific. case 'm': k = ConversionSpecifier::PrintErrno; break; } - FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k)); + ConversionSpecifier CS(conversionPosition, k); + FS.setConversionSpecifier(CS); + if (CS.consumesDataArgument()) + FS.setArgIndex(argIndex++); if (k == ConversionSpecifier::InvalidSpecifier) { - H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); - return false; // Keep processing format specifiers. + // Assume the conversion takes one argument. + return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); } return FormatSpecifierResult(Start, FS); } bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, const char *I, const char *E) { + + unsigned argIndex = 0; + // Keep looking for a format specifier until we have exhausted the string. while (I != E) { - const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E); + const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex); // Did a fail-stop error of any kind occur when parsing the specifier? // If so, don't do any more processing. if (FSR.shouldStop()) @@ -430,7 +438,7 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { return Ctx.LongDoubleTy; return Ctx.DoubleTy; } - + switch (CS.getKind()) { case ConversionSpecifier::CStrArg: return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); @@ -438,11 +446,11 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { // FIXME: This appears to be Mac OS X specific. return ArgTypeResult::WCStrTy; case ConversionSpecifier::CArg: - return Ctx.WCharTy; + return Ctx.WCharTy; default: break; } - + // FIXME: Handle other cases. return ArgTypeResult(); } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 019b79811d12..2f71c778ecf8 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -955,6 +955,7 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull, /// FormatGuard: Automatic Protection From printf Format String /// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. /// +/// TODO: /// Functionality implemented: /// /// We can statically check the following properties for string @@ -965,7 +966,7 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull, /// data arguments? /// /// (2) Does each format conversion correctly match the type of the -/// corresponding data argument? (TODO) +/// corresponding data argument? /// /// Moreover, for all printf functions we can: /// @@ -984,7 +985,6 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull, /// /// All of these checks can be done by parsing the format string. /// -/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). void Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, unsigned format_idx, unsigned firstDataArg) { @@ -1044,13 +1044,13 @@ class CheckPrintfHandler : public analyze_printf::FormatStringHandler { Sema &S; const StringLiteral *FExpr; const Expr *OrigFormatExpr; - unsigned NumConversions; const unsigned NumDataArgs; const bool IsObjCLiteral; const char *Beg; // Start of format string. const bool HasVAListArg; const CallExpr *TheCall; unsigned FormatIdx; + llvm::BitVector CoveredArgs; public: CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, const Expr *origFormatExpr, @@ -1058,17 +1058,20 @@ public: const char *beg, bool hasVAListArg, const CallExpr *theCall, unsigned formatIdx) : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), - NumConversions(0), NumDataArgs(numDataArgs), + NumDataArgs(numDataArgs), IsObjCLiteral(isObjCLiteral), Beg(beg), HasVAListArg(hasVAListArg), - TheCall(theCall), FormatIdx(formatIdx) {} + TheCall(theCall), FormatIdx(formatIdx) { + CoveredArgs.resize(numDataArgs); + CoveredArgs.reset(); + } void DoneProcessing(); void HandleIncompleteFormatSpecifier(const char *startSpecifier, unsigned specifierLen); - void + bool HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, unsigned specifierLen); @@ -1117,18 +1120,35 @@ HandleIncompleteFormatSpecifier(const char *startSpecifier, << getFormatSpecifierRange(startSpecifier, specifierLen); } -void CheckPrintfHandler:: +bool CheckPrintfHandler:: HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, unsigned specifierLen) { - ++NumConversions; + unsigned argIndex = FS.getArgIndex(); + bool keepGoing = true; + if (argIndex < NumDataArgs) { + // Consider the argument coverered, even though the specifier doesn't + // make sense. + CoveredArgs.set(argIndex); + } + else { + // If argIndex exceeds the number of data arguments we + // don't issue a warning because that is just a cascade of warnings (and + // they may have intended '%%' anyway). We don't want to continue processing + // the format string after this point, however, as we will like just get + // gibberish when trying to match arguments. + keepGoing = false; + } + const analyze_printf::ConversionSpecifier &CS = FS.getConversionSpecifier(); SourceLocation Loc = getLocationOfByte(CS.getStart()); S.Diag(Loc, diag::warn_printf_invalid_conversion) << llvm::StringRef(CS.getStart(), CS.getLength()) << getFormatSpecifierRange(startSpecifier, specifierLen); + + return keepGoing; } void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) { @@ -1139,7 +1159,7 @@ void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) { } const Expr *CheckPrintfHandler::getDataArg(unsigned i) const { - return TheCall->getArg(FormatIdx + i); + return TheCall->getArg(FormatIdx + i + 1); } @@ -1162,9 +1182,9 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt, unsigned specifierLen) { if (Amt.hasDataArgument()) { - ++NumConversions; if (!HasVAListArg) { - if (NumConversions > NumDataArgs) { + unsigned argIndex = Amt.getArgIndex(); + if (argIndex >= NumDataArgs) { S.Diag(getLocationOfByte(Amt.getStart()), MissingArgDiag) << getFormatSpecifierRange(startSpecifier, specifierLen); // Don't do any more checking. We will just emit @@ -1176,7 +1196,8 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt, // Although not in conformance with C99, we also allow the argument to be // an 'unsigned int' as that is a reasonably safe case. GCC also // doesn't emit a warning for that case. - const Expr *Arg = getDataArg(NumConversions); + CoveredArgs.set(argIndex); + const Expr *Arg = getDataArg(argIndex); QualType T = Arg->getType(); const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context); @@ -1221,22 +1242,21 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier return false; } - // Check for using an Objective-C specific conversion specifier - // in a non-ObjC literal. - if (!IsObjCLiteral && CS.isObjCArg()) { - HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen); - - // Continue checking the other format specifiers. - return true; - } - if (!CS.consumesDataArgument()) { // FIXME: Technically specifying a precision or field width here // makes no sense. Worth issuing a warning at some point. return true; } - ++NumConversions; + // Consume the argument. + unsigned argIndex = FS.getArgIndex(); + CoveredArgs.set(argIndex); + + // Check for using an Objective-C specific conversion specifier + // in a non-ObjC literal. + if (!IsObjCLiteral && CS.isObjCArg()) { + return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen); + } // Are we using '%n'? Issue a warning about this being // a possible security issue. @@ -1270,7 +1290,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier if (HasVAListArg) return true; - if (NumConversions > NumDataArgs) { + if (argIndex >= NumDataArgs) { S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_insufficient_data_args) << getFormatSpecifierRange(startSpecifier, specifierLen); @@ -1280,7 +1300,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier // Now type check the data expression that matches the // format specifier. - const Expr *Ex = getDataArg(NumConversions); + const Expr *Ex = getDataArg(argIndex); const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context); if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { // Check if we didn't match because of an implicit cast from a 'char' @@ -1304,10 +1324,17 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier void CheckPrintfHandler::DoneProcessing() { // Does the number of data arguments exceed the number of // format conversions in the format string? - if (!HasVAListArg && NumConversions < NumDataArgs) - S.Diag(getDataArg(NumConversions+1)->getLocStart(), - diag::warn_printf_too_many_data_args) - << getFormatStringRange(); + if (!HasVAListArg) { + // Find any arguments that weren't covered. + CoveredArgs.flip(); + signed notCoveredArg = CoveredArgs.find_first(); + if (notCoveredArg >= 0) { + assert((unsigned)notCoveredArg < NumDataArgs); + S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(), + diag::warn_printf_data_arg_not_used) + << getFormatStringRange(); + } + } } void Sema::CheckPrintfString(const StringLiteral *FExpr, diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c index e2686a885e0b..191996004dd5 100644 --- a/clang/test/Sema/format-strings.c +++ b/clang/test/Sema/format-strings.c @@ -55,7 +55,7 @@ void check_conditional_literal(const char* s, int i) { printf(i == 1 ? "yes" : "no"); // no-warning printf(i == 0 ? (i == 1 ? "yes" : "no") : "dont know"); // no-warning printf(i == 0 ? (i == 1 ? s : "no") : "dont know"); // expected-warning{{format string is not a string literal}} - printf("yes" ?: "no %d", 1); // expected-warning{{more data arguments than format specifiers}} + printf("yes" ?: "no %d", 1); // expected-warning{{data argument not used by format string}} } void check_writeback_specifier() @@ -155,7 +155,7 @@ void test10(int x, float f, int i, long long lli) { printf("%**\n"); // expected-warning{{invalid conversion specifier '*'}} printf("%n", &i); // expected-warning{{use of '%n' in format string discouraged (potentially insecure)}} printf("%d%d\n", x); // expected-warning{{more '%' conversions than data arguments}} - printf("%d\n", x, x); // expected-warning{{more data arguments than format specifiers}} + printf("%d\n", x, x); // expected-warning{{data argument not used by format string}} printf("%W%d%Z\n", x, x, x); // expected-warning{{invalid conversion specifier 'W'}} expected-warning{{invalid conversion specifier 'Z'}} printf("%"); // expected-warning{{incomplete format specifier}} printf("%.d", x); // no-warning