forked from OSchip/llvm-project
Start improving diagnostics that relate to subcharacters of string literals.
First step, handle diagnostics in StringLiteral's that are due to token pasting. For example, we now handle: id str2 = @"foo" "bar" @"baz" " b\0larg"; // expected-warning {{literal contains NUL character}} Correctly: test/SemaObjC/exprs.m:17:15: warning: CFString literal contains NUL character " b\0larg"; // expected-warning {{literal contains NUL character}} ~~~^~~~~~~ There are several other related issues still to be done. llvm-svn: 64924
This commit is contained in:
parent
6add6181c8
commit
a26fb347a0
|
@ -520,7 +520,6 @@ public:
|
|||
typedef const SourceLocation *tokloc_iterator;
|
||||
tokloc_iterator tokloc_begin() const { return TokLocs; }
|
||||
tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
|
||||
|
||||
|
||||
virtual SourceRange getSourceRange() const {
|
||||
return SourceRange(TokLocs[0], TokLocs[NumConcatenated-1]);
|
||||
|
|
|
@ -1989,6 +1989,8 @@ public:
|
|||
private:
|
||||
Action::OwningExprResult CheckFunctionCall(FunctionDecl *FDecl,
|
||||
CallExpr *TheCall);
|
||||
SourceLocation getLocationOfStringLiteralByte(const StringLiteral *SL,
|
||||
unsigned ByteNo) const;
|
||||
bool CheckObjCString(Expr *Arg);
|
||||
bool SemaBuiltinVAStart(CallExpr *TheCall);
|
||||
bool SemaBuiltinUnorderedCompare(CallExpr *TheCall);
|
||||
|
|
|
@ -20,6 +20,71 @@
|
|||
#include "clang/Lex/Preprocessor.h"
|
||||
using namespace clang;
|
||||
|
||||
/// getLocationOfStringLiteralByte - Return a source location that points to the
|
||||
/// specified byte of the specified string literal.
|
||||
///
|
||||
/// Strings are amazingly complex. They can be formed from multiple tokens and
|
||||
/// can have escape sequences in them in addition to the usual trigraph and
|
||||
/// escaped newline business. This routine handles this complexity.
|
||||
///
|
||||
SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
|
||||
unsigned ByteNo) const {
|
||||
assert(!SL->isWide() && "This doesn't work for wide strings yet");
|
||||
|
||||
// Loop over all of the tokens in this string until we find the one that
|
||||
// contains the byte we're looking for.
|
||||
unsigned TokNo = 0;
|
||||
while (1) {
|
||||
assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
|
||||
SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
|
||||
|
||||
// Get the spelling of the string so that we can get the data that makes up
|
||||
// the string literal, not the identifier for the macro it is potentially
|
||||
// expanded through.
|
||||
SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
|
||||
|
||||
// Re-lex the token to get its length and original spelling.
|
||||
std::pair<FileID, unsigned> LocInfo =
|
||||
SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
|
||||
std::pair<const char *,const char *> Buffer =
|
||||
SourceMgr.getBufferData(LocInfo.first);
|
||||
const char *StrData = Buffer.first+LocInfo.second;
|
||||
|
||||
// Create a langops struct and enable trigraphs. This is sufficient for
|
||||
// relexing tokens.
|
||||
LangOptions LangOpts;
|
||||
LangOpts.Trigraphs = true;
|
||||
|
||||
// Create a lexer starting at the beginning of this token.
|
||||
Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
|
||||
Buffer.second);
|
||||
Token TheTok;
|
||||
TheLexer.LexFromRawLexer(TheTok);
|
||||
|
||||
// The length of the string is the token length minus the two quotes.
|
||||
unsigned TokNumBytes = TheTok.getLength()-2;
|
||||
|
||||
// If we found the token we're looking for, return the location.
|
||||
// FIXME: This should consider character escapes!
|
||||
if (ByteNo < TokNumBytes ||
|
||||
(ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
|
||||
// If the original token came from a macro expansion, just return the
|
||||
// start of the token. We don't want to magically jump to the spelling
|
||||
// for a diagnostic. We do the above business in case some tokens come
|
||||
// from a macro expansion but others don't.
|
||||
if (!StrTokLoc.isFileID()) return StrTokLoc;
|
||||
|
||||
// We advance +1 to step over the '"'.
|
||||
return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1);
|
||||
}
|
||||
|
||||
// Move to the next string token.
|
||||
++TokNo;
|
||||
ByteNo -= TokNumBytes;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// CheckFunctionCall - Check a direct function call for various correctness
|
||||
/// and safety properties not strictly enforced by the C type system.
|
||||
Action::OwningExprResult
|
||||
|
@ -108,14 +173,14 @@ bool Sema::CheckObjCString(Expr *Arg) {
|
|||
|
||||
for (unsigned i = 0; i < Length; ++i) {
|
||||
if (!isascii(Data[i])) {
|
||||
Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
|
||||
Diag(getLocationOfStringLiteralByte(Literal, i),
|
||||
diag::warn_cfstring_literal_contains_non_ascii_character)
|
||||
<< Arg->getSourceRange();
|
||||
break;
|
||||
}
|
||||
|
||||
if (!Data[i]) {
|
||||
Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
|
||||
Diag(getLocationOfStringLiteralByte(Literal, i),
|
||||
diag::warn_cfstring_literal_contains_nul_character)
|
||||
<< Arg->getSourceRange();
|
||||
break;
|
||||
|
@ -565,7 +630,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
|||
if (Str[StrIdx] == '\0') {
|
||||
// The string returned by getStrData() is not null-terminated,
|
||||
// so the presence of a null character is likely an error.
|
||||
Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
|
||||
Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
|
||||
diag::warn_printf_format_string_contains_null_char)
|
||||
<< OrigFormatExpr->getSourceRange();
|
||||
return;
|
||||
|
@ -587,8 +652,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
|||
++numConversions;
|
||||
|
||||
if (!HasVAListArg && numConversions > numDataArgs) {
|
||||
SourceLocation Loc = FExpr->getLocStart();
|
||||
Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
|
||||
SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
|
||||
|
||||
if (Str[StrIdx-1] == '.')
|
||||
Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
|
||||
|
@ -607,8 +671,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
|||
if (BT->getKind() == BuiltinType::Int)
|
||||
break;
|
||||
|
||||
SourceLocation Loc =
|
||||
PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
|
||||
SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
|
||||
|
||||
if (Str[StrIdx-1] == '.')
|
||||
Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
|
||||
|
@ -655,8 +718,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
|||
case 'n': {
|
||||
++numConversions;
|
||||
CurrentState = state_OrdChr;
|
||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
||||
LastConversionIdx+1);
|
||||
SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
|
||||
LastConversionIdx);
|
||||
|
||||
Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
|
||||
break;
|
||||
|
@ -669,8 +732,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
|||
CurrentState = state_OrdChr;
|
||||
else {
|
||||
// Issue a warning: invalid format conversion.
|
||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
||||
LastConversionIdx+1);
|
||||
SourceLocation Loc =
|
||||
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
|
||||
|
||||
Diag(Loc, diag::warn_printf_invalid_conversion)
|
||||
<< std::string(Str+LastConversionIdx,
|
||||
|
@ -690,8 +753,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
|||
CurrentState = state_OrdChr;
|
||||
else {
|
||||
// Issue a warning: invalid format conversion.
|
||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
||||
LastConversionIdx+1);
|
||||
SourceLocation Loc =
|
||||
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
|
||||
|
||||
Diag(Loc, diag::warn_printf_invalid_conversion)
|
||||
<< std::string(Str+LastConversionIdx, Str+StrIdx)
|
||||
|
@ -713,8 +776,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
|||
|
||||
if (CurrentState == state_Conversion) {
|
||||
// Issue a warning: invalid format conversion.
|
||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
||||
LastConversionIdx+1);
|
||||
SourceLocation Loc =
|
||||
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
|
||||
|
||||
Diag(Loc, diag::warn_printf_invalid_conversion)
|
||||
<< std::string(Str+LastConversionIdx,
|
||||
|
@ -727,8 +790,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
|
|||
// CHECK: Does the number of format conversions exceed the number
|
||||
// of data arguments?
|
||||
if (numConversions > numDataArgs) {
|
||||
SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
|
||||
LastConversionIdx);
|
||||
SourceLocation Loc =
|
||||
getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
|
||||
|
||||
Diag(Loc, diag::warn_printf_insufficient_data_args)
|
||||
<< OrigFormatExpr->getSourceRange();
|
||||
|
|
|
@ -1,7 +1,19 @@
|
|||
// RUN: clang %s -fsyntax-only -verify
|
||||
|
||||
// rdar://6597252
|
||||
Class foo(Class X) {
|
||||
Class test1(Class X) {
|
||||
return 1 ? X : X;
|
||||
}
|
||||
|
||||
|
||||
// rdar://6079877
|
||||
void test2() {
|
||||
id str = @"foo"
|
||||
"bar\0" // expected-warning {{literal contains NUL character}}
|
||||
@"baz" " blarg";
|
||||
id str2 = @"foo"
|
||||
"bar"
|
||||
@"baz"
|
||||
" b\0larg"; // expected-warning {{literal contains NUL character}}
|
||||
|
||||
}
|
Loading…
Reference in New Issue