forked from OSchip/llvm-project
Start improving diagnostics that relate to subcharacters of string literals.
First step, handle diagnostics in StringLiteral's that are due to token pasting. For example, we now handle: id str2 = @"foo" "bar" @"baz" " b\0larg"; // expected-warning {{literal contains NUL character}} Correctly: test/SemaObjC/exprs.m:17:15: warning: CFString literal contains NUL character " b\0larg"; // expected-warning {{literal contains NUL character}} ~~~^~~~~~~ There are several other related issues still to be done. llvm-svn: 64924
This commit is contained in:
parent
6add6181c8
commit
a26fb347a0
|
@ -520,7 +520,6 @@ public:
|
||||||
typedef const SourceLocation *tokloc_iterator;
|
typedef const SourceLocation *tokloc_iterator;
|
||||||
tokloc_iterator tokloc_begin() const { return TokLocs; }
|
tokloc_iterator tokloc_begin() const { return TokLocs; }
|
||||||
tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
|
tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
|
||||||
|
|
||||||
|
|
||||||
virtual SourceRange getSourceRange() const {
|
virtual SourceRange getSourceRange() const {
|
||||||
return SourceRange(TokLocs[0], TokLocs[NumConcatenated-1]);
|
return SourceRange(TokLocs[0], TokLocs[NumConcatenated-1]);
|
||||||
|
|
|
@ -1989,6 +1989,8 @@ public:
|
||||||
private:
|
private:
|
||||||
Action::OwningExprResult CheckFunctionCall(FunctionDecl *FDecl,
|
Action::OwningExprResult CheckFunctionCall(FunctionDecl *FDecl,
|
||||||
CallExpr *TheCall);
|
CallExpr *TheCall);
|
||||||
|
SourceLocation getLocationOfStringLiteralByte(const StringLiteral *SL,
|
||||||
|
unsigned ByteNo) const;
|
||||||
bool CheckObjCString(Expr *Arg);
|
bool CheckObjCString(Expr *Arg);
|
||||||
bool SemaBuiltinVAStart(CallExpr *TheCall);
|
bool SemaBuiltinVAStart(CallExpr *TheCall);
|
||||||
bool SemaBuiltinUnorderedCompare(CallExpr *TheCall);
|
bool SemaBuiltinUnorderedCompare(CallExpr *TheCall);
|
||||||
|
|
|
@ -20,6 +20,71 @@
|
||||||
#include "clang/Lex/Preprocessor.h"
|
#include "clang/Lex/Preprocessor.h"
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
|
|
||||||
|
/// getLocationOfStringLiteralByte - Return a source location that points to the
|
||||||
|
/// specified byte of the specified string literal.
|
||||||
|
///
|
||||||
|
/// Strings are amazingly complex. They can be formed from multiple tokens and
|
||||||
|
/// can have escape sequences in them in addition to the usual trigraph and
|
||||||
|
/// escaped newline business. This routine handles this complexity.
|
||||||
|
///
|
||||||
|
SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
|
||||||
|
unsigned ByteNo) const {
|
||||||
|
assert(!SL->isWide() && "This doesn't work for wide strings yet");
|
||||||
|
|
||||||
|
// Loop over all of the tokens in this string until we find the one that
|
||||||
|
// contains the byte we're looking for.
|
||||||
|
unsigned TokNo = 0;
|
||||||
|
while (1) {
|
||||||
|
assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
|
||||||
|
SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
|
||||||
|
|
||||||
|
// Get the spelling of the string so that we can get the data that makes up
|
||||||
|
// the string literal, not the identifier for the macro it is potentially
|
||||||
|
// expanded through.
|
||||||
|
SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
|
||||||
|
|
||||||
|
// Re-lex the token to get its length and original spelling.
|
||||||
|
std::pair<FileID, unsigned> LocInfo =
|
||||||
|
SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
|
||||||
|
std::pair<const char *,const char *> Buffer =
|
||||||
|
SourceMgr.getBufferData(LocInfo.first);
|
||||||
|
const char *StrData = Buffer.first+LocInfo.second;
|
||||||
|
|
||||||
|
// Create a langops struct and enable trigraphs. This is sufficient for
|
||||||
|
// relexing tokens.
|
||||||
|
LangOptions LangOpts;
|
||||||
|
LangOpts.Trigraphs = true;
|
||||||
|
|
||||||
|
// Create a lexer starting at the beginning of this token.
|
||||||
|
Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
|
||||||
|
Buffer.second);
|
||||||
|
Token TheTok;
|
||||||
|
TheLexer.LexFromRawLexer(TheTok);
|
||||||
|
|
||||||
|
// The length of the string is the token length minus the two quotes.
|
||||||
|
unsigned TokNumBytes = TheTok.getLength()-2;
|
||||||
|
|
||||||
|
// If we found the token we're looking for, return the location.
|
||||||
|
// FIXME: This should consider character escapes!
|
||||||
|
if (ByteNo < TokNumBytes ||
|
||||||
|
(ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
|
||||||
|
// If the original token came from a macro expansion, just return the
|
||||||
|
// start of the token. We don't want to magically jump to the spelling
|
||||||
|
// for a diagnostic. We do the above business in case some tokens come
|
||||||
|
// from a macro expansion but others don't.
|
||||||
|
if (!StrTokLoc.isFileID()) return StrTokLoc;
|
||||||
|
|
||||||
|
// We advance +1 to step over the '"'.
|
||||||
|
return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move to the next string token.
|
||||||
|
++TokNo;
|
||||||
|
ByteNo -= TokNumBytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// CheckFunctionCall - Check a direct function call for various correctness
|
/// CheckFunctionCall - Check a direct function call for various correctness
|
||||||
/// and safety properties not strictly enforced by the C type system.
|
/// and safety properties not strictly enforced by the C type system.
|
||||||
Action::OwningExprResult
|
Action::OwningExprResult
|
||||||
|
@ -108,14 +173,14 @@ bool Sema::CheckObjCString(Expr *Arg) {
|
||||||
|
|
||||||
for (unsigned i = 0; i < Length; ++i) {
|
for (unsigned i = 0; i < Length; ++i) {
|
||||||
if (!isascii(Data[i])) {
|
if (!isascii(Data[i])) {
|
||||||
Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
|
Diag(getLocationOfStringLiteralByte(Literal, i),
|
||||||
diag::warn_cfstring_literal_contains_non_ascii_character)
|
diag::warn_cfstring_literal_contains_non_ascii_character)
|
||||||
<< Arg->getSourceRange();
|
<< Arg->getSourceRange();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Data[i]) {
|
if (!Data[i]) {
|
||||||
Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
|
Diag(getLocationOfStringLiteralByte(Literal, i),
|
||||||
diag::warn_cfstring_literal_contains_nul_character)
|
diag::warn_cfstring_literal_contains_nul_character)
|
||||||
<< Arg->getSourceRange();
|
<< Arg->getSourceRange();
|
||||||
break;
|
break;
|
||||||
|
@ -565,7 +630,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
||||||
if (Str[StrIdx] == '\0') {
|
if (Str[StrIdx] == '\0') {
|
||||||
// The string returned by getStrData() is not null-terminated,
|
// The string returned by getStrData() is not null-terminated,
|
||||||
// so the presence of a null character is likely an error.
|
// so the presence of a null character is likely an error.
|
||||||
Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
|
Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
|
||||||
diag::warn_printf_format_string_contains_null_char)
|
diag::warn_printf_format_string_contains_null_char)
|
||||||
<< OrigFormatExpr->getSourceRange();
|
<< OrigFormatExpr->getSourceRange();
|
||||||
return;
|
return;
|
||||||
|
@ -587,8 +652,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
||||||
++numConversions;
|
++numConversions;
|
||||||
|
|
||||||
if (!HasVAListArg && numConversions > numDataArgs) {
|
if (!HasVAListArg && numConversions > numDataArgs) {
|
||||||
SourceLocation Loc = FExpr->getLocStart();
|
SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
|
||||||
Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
|
|
||||||
|
|
||||||
if (Str[StrIdx-1] == '.')
|
if (Str[StrIdx-1] == '.')
|
||||||
Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
|
Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
|
||||||
|
@ -607,8 +671,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
||||||
if (BT->getKind() == BuiltinType::Int)
|
if (BT->getKind() == BuiltinType::Int)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
SourceLocation Loc =
|
SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
|
||||||
PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
|
|
||||||
|
|
||||||
if (Str[StrIdx-1] == '.')
|
if (Str[StrIdx-1] == '.')
|
||||||
Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
|
Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
|
||||||
|
@ -655,8 +718,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
||||||
case 'n': {
|
case 'n': {
|
||||||
++numConversions;
|
++numConversions;
|
||||||
CurrentState = state_OrdChr;
|
CurrentState = state_OrdChr;
|
||||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
|
||||||
LastConversionIdx+1);
|
LastConversionIdx);
|
||||||
|
|
||||||
Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
|
Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
|
||||||
break;
|
break;
|
||||||
|
@ -669,8 +732,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
||||||
CurrentState = state_OrdChr;
|
CurrentState = state_OrdChr;
|
||||||
else {
|
else {
|
||||||
// Issue a warning: invalid format conversion.
|
// Issue a warning: invalid format conversion.
|
||||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
SourceLocation Loc =
|
||||||
LastConversionIdx+1);
|
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
|
||||||
|
|
||||||
Diag(Loc, diag::warn_printf_invalid_conversion)
|
Diag(Loc, diag::warn_printf_invalid_conversion)
|
||||||
<< std::string(Str+LastConversionIdx,
|
<< std::string(Str+LastConversionIdx,
|
||||||
|
@ -690,8 +753,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
||||||
CurrentState = state_OrdChr;
|
CurrentState = state_OrdChr;
|
||||||
else {
|
else {
|
||||||
// Issue a warning: invalid format conversion.
|
// Issue a warning: invalid format conversion.
|
||||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
SourceLocation Loc =
|
||||||
LastConversionIdx+1);
|
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
|
||||||
|
|
||||||
Diag(Loc, diag::warn_printf_invalid_conversion)
|
Diag(Loc, diag::warn_printf_invalid_conversion)
|
||||||
<< std::string(Str+LastConversionIdx, Str+StrIdx)
|
<< std::string(Str+LastConversionIdx, Str+StrIdx)
|
||||||
|
@ -713,8 +776,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
||||||
|
|
||||||
if (CurrentState == state_Conversion) {
|
if (CurrentState == state_Conversion) {
|
||||||
// Issue a warning: invalid format conversion.
|
// Issue a warning: invalid format conversion.
|
||||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
SourceLocation Loc =
|
||||||
LastConversionIdx+1);
|
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
|
||||||
|
|
||||||
Diag(Loc, diag::warn_printf_invalid_conversion)
|
Diag(Loc, diag::warn_printf_invalid_conversion)
|
||||||
<< std::string(Str+LastConversionIdx,
|
<< std::string(Str+LastConversionIdx,
|
||||||
|
@ -727,8 +790,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
||||||
// CHECK: Does the number of format conversions exceed the number
|
// CHECK: Does the number of format conversions exceed the number
|
||||||
// of data arguments?
|
// of data arguments?
|
||||||
if (numConversions > numDataArgs) {
|
if (numConversions > numDataArgs) {
|
||||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
SourceLocation Loc =
|
||||||
LastConversionIdx);
|
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
|
||||||
|
|
||||||
Diag(Loc, diag::warn_printf_insufficient_data_args)
|
Diag(Loc, diag::warn_printf_insufficient_data_args)
|
||||||
<< OrigFormatExpr->getSourceRange();
|
<< OrigFormatExpr->getSourceRange();
|
||||||
|
|
|
@ -1,7 +1,19 @@
|
||||||
// RUN: clang %s -fsyntax-only -verify
|
// RUN: clang %s -fsyntax-only -verify
|
||||||
|
|
||||||
// rdar://6597252
|
// rdar://6597252
|
||||||
Class foo(Class X) {
|
Class test1(Class X) {
|
||||||
return 1 ? X : X;
|
return 1 ? X : X;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// rdar://6079877
|
||||||
|
void test2() {
|
||||||
|
id str = @"foo"
|
||||||
|
"bar\0" // expected-warning {{literal contains NUL character}}
|
||||||
|
@"baz" " blarg";
|
||||||
|
id str2 = @"foo"
|
||||||
|
"bar"
|
||||||
|
@"baz"
|
||||||
|
" b\0larg"; // expected-warning {{literal contains NUL character}}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue