Start improving diagnostics that relate to subcharacters of string literals.

First step, handle diagnostics in StringLiteral's that are due to token pasting.

For example, we now handle:
  id str2 = @"foo" 
            "bar"
           @"baz"
           " b\0larg";  // expected-warning {{literal contains NUL character}}

Correctly:

test/SemaObjC/exprs.m:17:15: warning: CFString literal contains NUL character
           " b\0larg";  // expected-warning {{literal contains NUL character}}
           ~~~^~~~~~~

There are several other related issues still to be done.

llvm-svn: 64924
This commit is contained in:
Chris Lattner 2009-02-18 17:49:48 +00:00
parent 6add6181c8
commit a26fb347a0
4 changed files with 95 additions and 19 deletions

View File

@ -520,7 +520,6 @@ public:
typedef const SourceLocation *tokloc_iterator; typedef const SourceLocation *tokloc_iterator;
tokloc_iterator tokloc_begin() const { return TokLocs; } tokloc_iterator tokloc_begin() const { return TokLocs; }
tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; } tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
virtual SourceRange getSourceRange() const { virtual SourceRange getSourceRange() const {
return SourceRange(TokLocs[0], TokLocs[NumConcatenated-1]); return SourceRange(TokLocs[0], TokLocs[NumConcatenated-1]);

View File

@ -1989,6 +1989,8 @@ public:
private: private:
Action::OwningExprResult CheckFunctionCall(FunctionDecl *FDecl, Action::OwningExprResult CheckFunctionCall(FunctionDecl *FDecl,
CallExpr *TheCall); CallExpr *TheCall);
SourceLocation getLocationOfStringLiteralByte(const StringLiteral *SL,
unsigned ByteNo) const;
bool CheckObjCString(Expr *Arg); bool CheckObjCString(Expr *Arg);
bool SemaBuiltinVAStart(CallExpr *TheCall); bool SemaBuiltinVAStart(CallExpr *TheCall);
bool SemaBuiltinUnorderedCompare(CallExpr *TheCall); bool SemaBuiltinUnorderedCompare(CallExpr *TheCall);

View File

@ -20,6 +20,71 @@
#include "clang/Lex/Preprocessor.h" #include "clang/Lex/Preprocessor.h"
using namespace clang; using namespace clang;
/// getLocationOfStringLiteralByte - Return a source location that points to the
/// specified byte of the specified string literal.
///
/// Strings are amazingly complex. They can be formed from multiple tokens and
/// can have escape sequences in them in addition to the usual trigraph and
/// escaped newline business. This routine handles this complexity.
///
SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
unsigned ByteNo) const {
assert(!SL->isWide() && "This doesn't work for wide strings yet");
// Loop over all of the tokens in this string until we find the one that
// contains the byte we're looking for.
unsigned TokNo = 0;
while (1) {
assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
// Get the spelling of the string so that we can get the data that makes up
// the string literal, not the identifier for the macro it is potentially
// expanded through.
SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
// Re-lex the token to get its length and original spelling.
std::pair<FileID, unsigned> LocInfo =
SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
std::pair<const char *,const char *> Buffer =
SourceMgr.getBufferData(LocInfo.first);
const char *StrData = Buffer.first+LocInfo.second;
// Create a langops struct and enable trigraphs. This is sufficient for
// relexing tokens.
LangOptions LangOpts;
LangOpts.Trigraphs = true;
// Create a lexer starting at the beginning of this token.
Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
Buffer.second);
Token TheTok;
TheLexer.LexFromRawLexer(TheTok);
// The length of the string is the token length minus the two quotes.
unsigned TokNumBytes = TheTok.getLength()-2;
// If we found the token we're looking for, return the location.
// FIXME: This should consider character escapes!
if (ByteNo < TokNumBytes ||
(ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
// If the original token came from a macro expansion, just return the
// start of the token. We don't want to magically jump to the spelling
// for a diagnostic. We do the above business in case some tokens come
// from a macro expansion but others don't.
if (!StrTokLoc.isFileID()) return StrTokLoc;
// We advance +1 to step over the '"'.
return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1);
}
// Move to the next string token.
++TokNo;
ByteNo -= TokNumBytes;
}
}
/// CheckFunctionCall - Check a direct function call for various correctness /// CheckFunctionCall - Check a direct function call for various correctness
/// and safety properties not strictly enforced by the C type system. /// and safety properties not strictly enforced by the C type system.
Action::OwningExprResult Action::OwningExprResult
@ -108,14 +173,14 @@ bool Sema::CheckObjCString(Expr *Arg) {
for (unsigned i = 0; i < Length; ++i) { for (unsigned i = 0; i < Length; ++i) {
if (!isascii(Data[i])) { if (!isascii(Data[i])) {
Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), Diag(getLocationOfStringLiteralByte(Literal, i),
diag::warn_cfstring_literal_contains_non_ascii_character) diag::warn_cfstring_literal_contains_non_ascii_character)
<< Arg->getSourceRange(); << Arg->getSourceRange();
break; break;
} }
if (!Data[i]) { if (!Data[i]) {
Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), Diag(getLocationOfStringLiteralByte(Literal, i),
diag::warn_cfstring_literal_contains_nul_character) diag::warn_cfstring_literal_contains_nul_character)
<< Arg->getSourceRange(); << Arg->getSourceRange();
break; break;
@ -565,7 +630,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
if (Str[StrIdx] == '\0') { if (Str[StrIdx] == '\0') {
// The string returned by getStrData() is not null-terminated, // The string returned by getStrData() is not null-terminated,
// so the presence of a null character is likely an error. // so the presence of a null character is likely an error.
Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
diag::warn_printf_format_string_contains_null_char) diag::warn_printf_format_string_contains_null_char)
<< OrigFormatExpr->getSourceRange(); << OrigFormatExpr->getSourceRange();
return; return;
@ -587,8 +652,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
++numConversions; ++numConversions;
if (!HasVAListArg && numConversions > numDataArgs) { if (!HasVAListArg && numConversions > numDataArgs) {
SourceLocation Loc = FExpr->getLocStart(); SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
if (Str[StrIdx-1] == '.') if (Str[StrIdx-1] == '.')
Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
@ -607,8 +671,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
if (BT->getKind() == BuiltinType::Int) if (BT->getKind() == BuiltinType::Int)
break; break;
SourceLocation Loc = SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
if (Str[StrIdx-1] == '.') if (Str[StrIdx-1] == '.')
Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
@ -655,8 +718,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
case 'n': { case 'n': {
++numConversions; ++numConversions;
CurrentState = state_OrdChr; CurrentState = state_OrdChr;
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
LastConversionIdx+1); LastConversionIdx);
Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
break; break;
@ -669,8 +732,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
CurrentState = state_OrdChr; CurrentState = state_OrdChr;
else { else {
// Issue a warning: invalid format conversion. // Issue a warning: invalid format conversion.
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), SourceLocation Loc =
LastConversionIdx+1); getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_invalid_conversion) Diag(Loc, diag::warn_printf_invalid_conversion)
<< std::string(Str+LastConversionIdx, << std::string(Str+LastConversionIdx,
@ -690,8 +753,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
CurrentState = state_OrdChr; CurrentState = state_OrdChr;
else { else {
// Issue a warning: invalid format conversion. // Issue a warning: invalid format conversion.
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), SourceLocation Loc =
LastConversionIdx+1); getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_invalid_conversion) Diag(Loc, diag::warn_printf_invalid_conversion)
<< std::string(Str+LastConversionIdx, Str+StrIdx) << std::string(Str+LastConversionIdx, Str+StrIdx)
@ -713,8 +776,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
if (CurrentState == state_Conversion) { if (CurrentState == state_Conversion) {
// Issue a warning: invalid format conversion. // Issue a warning: invalid format conversion.
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), SourceLocation Loc =
LastConversionIdx+1); getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_invalid_conversion) Diag(Loc, diag::warn_printf_invalid_conversion)
<< std::string(Str+LastConversionIdx, << std::string(Str+LastConversionIdx,
@ -727,8 +790,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
// CHECK: Does the number of format conversions exceed the number // CHECK: Does the number of format conversions exceed the number
// of data arguments? // of data arguments?
if (numConversions > numDataArgs) { if (numConversions > numDataArgs) {
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), SourceLocation Loc =
LastConversionIdx); getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Diag(Loc, diag::warn_printf_insufficient_data_args) Diag(Loc, diag::warn_printf_insufficient_data_args)
<< OrigFormatExpr->getSourceRange(); << OrigFormatExpr->getSourceRange();

View File

@ -1,7 +1,19 @@
// RUN: clang %s -fsyntax-only -verify // RUN: clang %s -fsyntax-only -verify
// rdar://6597252 // rdar://6597252
Class foo(Class X) { Class test1(Class X) {
return 1 ? X : X; return 1 ? X : X;
} }
// rdar://6079877
void test2() {
id str = @"foo"
"bar\0" // expected-warning {{literal contains NUL character}}
@"baz" " blarg";
id str2 = @"foo"
"bar"
@"baz"
" b\0larg"; // expected-warning {{literal contains NUL character}}
}