forked from OSchip/llvm-project
[Sema] Handle UTF-8 invalid format string specifiers
Improve invalid format string specifier handling by printing out invalid specifiers characters with \x, \u and \U. Previously clang would print gargabe whenever the character is unprintable. Example, before: NSLog(@"%\u25B9"); => warning: invalid conversion specifier ' [-Wformat-invalid-specifier] after: NSLog(@"%\u25B9"); => warning: invalid conversion specifier '\u25b9' [-Wformat-invalid-specifier] Differential Revision: http://reviews.llvm.org/D18296 rdar://problem/24672159 llvm-svn: 264752
This commit is contained in:
parent
ac400900da
commit
0c18d03d91
|
@ -210,6 +210,7 @@ public:
|
|||
unsigned getLength() const {
|
||||
return EndScanList ? EndScanList - Position : 1;
|
||||
}
|
||||
void setEndScanList(const char *pos) { EndScanList = pos; }
|
||||
|
||||
bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
|
||||
kind == FreeBSDrArg || kind == FreeBSDyArg; }
|
||||
|
@ -413,11 +414,6 @@ public:
|
|||
bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
|
||||
bool isDoubleArg() const { return kind >= DoubleArgBeg &&
|
||||
kind <= DoubleArgEnd; }
|
||||
unsigned getLength() const {
|
||||
// Conversion specifiers currently only are represented by
|
||||
// single characters, but we be flexible.
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
|
||||
return CS->isPrintfKind();
|
||||
|
@ -546,8 +542,6 @@ public:
|
|||
ScanfConversionSpecifier(const char *pos, Kind k)
|
||||
: ConversionSpecifier(false, pos, k) {}
|
||||
|
||||
void setEndScanList(const char *pos) { EndScanList = pos; }
|
||||
|
||||
static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
|
||||
return !CS->isPrintfKind();
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "FormatStringParsing.h"
|
||||
#include "clang/Basic/LangOptions.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "llvm/Support/ConvertUTF.h"
|
||||
|
||||
using clang::analyze_format_string::ArgType;
|
||||
using clang::analyze_format_string::FormatStringHandler;
|
||||
|
@ -260,6 +261,28 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
|
||||
const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) {
|
||||
if (SpecifierBegin + 1 >= FmtStrEnd)
|
||||
return false;
|
||||
|
||||
const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
|
||||
const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
|
||||
const char FirstByte = *SB;
|
||||
|
||||
// If the invalid specifier is a multibyte UTF-8 string, return the
|
||||
// total length accordingly so that the conversion specifier can be
|
||||
// properly updated to reflect a complete UTF-8 specifier.
|
||||
unsigned NumBytes = getNumBytesForUTF8(FirstByte);
|
||||
if (NumBytes == 1)
|
||||
return false;
|
||||
if (SB + NumBytes > SE)
|
||||
return false;
|
||||
|
||||
Len = NumBytes + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Methods on ArgType.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -46,7 +46,13 @@ bool ParseArgPosition(FormatStringHandler &H,
|
|||
/// FormatSpecifier& argument, and false otherwise.
|
||||
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E,
|
||||
const LangOptions &LO, bool IsScanf = false);
|
||||
|
||||
|
||||
/// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8
|
||||
/// string; check that it won't go further than \p FmtStrEnd and write
|
||||
/// up the total size in \p Len.
|
||||
bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin,
|
||||
const char *FmtStrEnd, unsigned &Len);
|
||||
|
||||
template <typename T> class SpecifierResult {
|
||||
T FS;
|
||||
const char *Start;
|
||||
|
|
|
@ -312,8 +312,13 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
|
|||
argIndex++;
|
||||
|
||||
if (k == ConversionSpecifier::InvalidSpecifier) {
|
||||
unsigned Len = I - Start;
|
||||
if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
|
||||
CS.setEndScanList(Start + Len);
|
||||
FS.setConversionSpecifier(CS);
|
||||
}
|
||||
// Assume the conversion takes one argument.
|
||||
return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
|
||||
return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
|
||||
}
|
||||
return PrintfSpecifierResult(Start, FS);
|
||||
}
|
||||
|
|
|
@ -79,7 +79,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
|
|||
unsigned &argIndex,
|
||||
const LangOptions &LO,
|
||||
const TargetInfo &Target) {
|
||||
|
||||
using namespace clang::analyze_format_string;
|
||||
using namespace clang::analyze_scanf;
|
||||
const char *I = Beg;
|
||||
const char *Start = nullptr;
|
||||
|
@ -210,10 +210,15 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
|
|||
|
||||
// FIXME: '%' and '*' doesn't make sense. Issue a warning.
|
||||
// FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
|
||||
|
||||
|
||||
if (k == ScanfConversionSpecifier::InvalidSpecifier) {
|
||||
unsigned Len = I - Beg;
|
||||
if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
|
||||
CS.setEndScanList(Beg + Len);
|
||||
FS.setConversionSpecifier(CS);
|
||||
}
|
||||
// Assume the conversion takes one argument.
|
||||
return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
|
||||
return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
|
||||
}
|
||||
return ScanfSpecifierResult(Start, FS);
|
||||
}
|
||||
|
|
|
@ -36,6 +36,8 @@
|
|||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallBitVector.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/Locale.h"
|
||||
#include "llvm/Support/ConvertUTF.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <limits>
|
||||
|
@ -3976,12 +3978,41 @@ CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,
|
|||
// gibberish when trying to match arguments.
|
||||
keepGoing = false;
|
||||
}
|
||||
|
||||
EmitFormatDiagnostic(S.PDiag(diag::warn_format_invalid_conversion)
|
||||
<< StringRef(csStart, csLen),
|
||||
Loc, /*IsStringLocation*/true,
|
||||
getSpecifierRange(startSpec, specifierLen));
|
||||
|
||||
|
||||
StringRef Specifier(csStart, csLen);
|
||||
|
||||
// If the specifier in non-printable, it could be the first byte of a UTF-8
|
||||
// sequence. In that case, print the UTF-8 code point. If not, print the byte
|
||||
// hex value.
|
||||
std::string CodePointStr;
|
||||
if (!llvm::sys::locale::isPrint(*csStart)) {
|
||||
UTF32 CodePoint;
|
||||
const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart);
|
||||
const UTF8 *E =
|
||||
reinterpret_cast<const UTF8 *>(csStart + csLen);
|
||||
ConversionResult Result =
|
||||
llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion);
|
||||
|
||||
if (Result != conversionOK) {
|
||||
unsigned char FirstChar = *csStart;
|
||||
CodePoint = (UTF32)FirstChar;
|
||||
}
|
||||
|
||||
llvm::raw_string_ostream OS(CodePointStr);
|
||||
if (CodePoint < 256)
|
||||
OS << "\\x" << llvm::format("%02x", CodePoint);
|
||||
else if (CodePoint <= 0xFFFF)
|
||||
OS << "\\u" << llvm::format("%04x", CodePoint);
|
||||
else
|
||||
OS << "\\U" << llvm::format("%08x", CodePoint);
|
||||
OS.flush();
|
||||
Specifier = CodePointStr;
|
||||
}
|
||||
|
||||
EmitFormatDiagnostic(
|
||||
S.PDiag(diag::warn_format_invalid_conversion) << Specifier, Loc,
|
||||
/*IsStringLocation*/ true, getSpecifierRange(startSpec, specifierLen));
|
||||
|
||||
return keepGoing;
|
||||
}
|
||||
|
||||
|
|
|
@ -183,3 +183,11 @@ void check_conditional_literal(char *s, int *i) {
|
|||
scanf(i ? "%d" : "%d", i, s); // expected-warning{{data argument not used}}
|
||||
scanf(i ? "%s" : "%d", s); // expected-warning{{format specifies type 'int *'}}
|
||||
}
|
||||
|
||||
void testInvalidNoPrintable(int *a) {
|
||||
scanf("%\u25B9", a); // expected-warning {{invalid conversion specifier '\u25b9'}}
|
||||
scanf("%\xE2\x96\xB9", a); // expected-warning {{invalid conversion specifier '\u25b9'}}
|
||||
scanf("%\U00010348", a); // expected-warning {{invalid conversion specifier '\U00010348'}}
|
||||
scanf("%\xF0\x90\x8D\x88", a); // expected-warning {{invalid conversion specifier '\U00010348'}}
|
||||
scanf("%\xe2", a); // expected-warning {{invalid conversion specifier '\xe2'}}
|
||||
}
|
||||
|
|
|
@ -642,6 +642,14 @@ void test_qualifiers(volatile int *vip, const int *cip,
|
|||
printf("%n", (cip_t)0); // expected-warning{{format specifies type 'int *' but the argument has type 'cip_t' (aka 'const int *')}}
|
||||
}
|
||||
|
||||
void testInvalidNoPrintable() {
|
||||
printf("%\u25B9"); // expected-warning {{invalid conversion specifier '\u25b9'}}
|
||||
printf("%\xE2\x96\xB9"); // expected-warning {{invalid conversion specifier '\u25b9'}}
|
||||
printf("%\U00010348"); // expected-warning {{invalid conversion specifier '\U00010348'}}
|
||||
printf("%\xF0\x90\x8D\x88"); // expected-warning {{invalid conversion specifier '\U00010348'}}
|
||||
printf("%\xe2"); // expected-warning {{invalid conversion specifier '\xe2'}}
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
|
||||
#pragma GCC diagnostic warning "-Wformat-security"
|
||||
// <rdar://problem/14178260>
|
||||
|
|
|
@ -265,3 +265,11 @@ void testObjCModifierFlags() {
|
|||
NSLog(@"%2$[tt]@ %1$[tt]s", @"Foo", @"Bar"); // expected-warning {{object format flags cannot be used with 's' conversion specifier}}
|
||||
}
|
||||
|
||||
// Test Objective-C invalid no printable specifiers
|
||||
void testObjcInvalidNoPrintable(int *a) {
|
||||
NSLog(@"%\u25B9"); // expected-warning {{invalid conversion specifier '\u25b9'}}
|
||||
NSLog(@"%\xE2\x96\xB9"); // expected-warning {{invalid conversion specifier '\u25b9'}}
|
||||
NSLog(@"%\U00010348"); // expected-warning {{invalid conversion specifier '\U00010348'}}
|
||||
NSLog(@"%\xF0\x90\x8D\x88"); // expected-warning {{invalid conversion specifier '\U00010348'}}
|
||||
NSLog(@"%\xe2"); // expected-warning {{input conversion stopped}} expected-warning {{invalid conversion specifier '\xe2'}}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue