Start improving diagnostics that relate to subcharacters of string literals.

First step, handle diagnostics in StringLiteral's that are due to token pasting. For example, we now handle: id str2 = @"foo" "bar" @"baz" " b\0larg"; // expected-warning {{literal contains NUL character}} Correctly: test/SemaObjC/exprs.m:17:15: warning: CFString literal contains NUL character " b\0larg"; // expected-warning {{literal contains NUL character}} ~~~^~~~~~~ There are several other related issues still to be done. llvm-svn: 64924
2009-02-18 17:49:48 +00:00 · 2009-02-18 17:49:48 +00:00 · a26fb347a0
parent 6add6181c8
commit a26fb347a0
4 changed files with 95 additions and 19 deletions
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@ -520,7 +520,6 @@ public:
  typedef const SourceLocation *tokloc_iterator;
  tokloc_iterator tokloc_begin() const { return TokLocs; }
  tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
  virtual SourceRange getSourceRange() const { 
    return SourceRange(TokLocs[0], TokLocs[NumConcatenated-1]); 
--- a/clang/lib/Sema/Sema.h
+++ b/clang/lib/Sema/Sema.h
@ -1989,6 +1989,8 @@ public:
 private:
  Action::OwningExprResult CheckFunctionCall(FunctionDecl *FDecl,
                                             CallExpr *TheCall);
  SourceLocation getLocationOfStringLiteralByte(const StringLiteral *SL,
                                                unsigned ByteNo) const;
  bool CheckObjCString(Expr *Arg);
  bool SemaBuiltinVAStart(CallExpr *TheCall);
  bool SemaBuiltinUnorderedCompare(CallExpr *TheCall);
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@ -20,6 +20,71 @@
 #include "clang/Lex/Preprocessor.h"
 using namespace clang;
 /// getLocationOfStringLiteralByte - Return a source location that points to the
 /// specified byte of the specified string literal.
 ///
 /// Strings are amazingly complex.  They can be formed from multiple tokens and
 /// can have escape sequences in them in addition to the usual trigraph and
 /// escaped newline business.  This routine handles this complexity.
 ///
 SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
                                                    unsigned ByteNo) const {
  assert(!SL->isWide() && "This doesn't work for wide strings yet");
  // Loop over all of the tokens in this string until we find the one that
  // contains the byte we're looking for.
  unsigned TokNo = 0;
  while (1) {
    assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
    SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
    // Get the spelling of the string so that we can get the data that makes up
    // the string literal, not the identifier for the macro it is potentially
    // expanded through.
    SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
    // Re-lex the token to get its length and original spelling.
    std::pair<FileID, unsigned> LocInfo =
      SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
    std::pair<const char *,const char *> Buffer =
      SourceMgr.getBufferData(LocInfo.first);
    const char *StrData = Buffer.first+LocInfo.second;
    // Create a langops struct and enable trigraphs.  This is sufficient for
    // relexing tokens.
    LangOptions LangOpts;
    LangOpts.Trigraphs = true;
    // Create a lexer starting at the beginning of this token.
    Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
                   Buffer.second);
    Token TheTok;
    TheLexer.LexFromRawLexer(TheTok);
    // The length of the string is the token length minus the two quotes.
    unsigned TokNumBytes = TheTok.getLength()-2;
    // If we found the token we're looking for, return the location.
    // FIXME: This should consider character escapes!
    if (ByteNo < TokNumBytes ||
        (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
      // If the original token came from a macro expansion, just return the
      // start of the token.  We don't want to magically jump to the spelling
      // for a diagnostic.  We do the above business in case some tokens come
      // from a macro expansion but others don't.
      if (!StrTokLoc.isFileID()) return StrTokLoc;
      // We advance +1 to step over the '"'.
      return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1);
    }
    // Move to the next string token.
    ++TokNo;
    ByteNo -= TokNumBytes;
  }
 }
 /// CheckFunctionCall - Check a direct function call for various correctness
 /// and safety properties not strictly enforced by the C type system.
 Action::OwningExprResult
@ -108,14 +173,14 @@ bool Sema::CheckObjCString(Expr *Arg) {
  for (unsigned i = 0; i < Length; ++i) {
    if (!isascii(Data[i])) {
-      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
+      Diag(getLocationOfStringLiteralByte(Literal, i),
           diag::warn_cfstring_literal_contains_non_ascii_character)
        << Arg->getSourceRange();
      break;
    }
    if (!Data[i]) {
-      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
+      Diag(getLocationOfStringLiteralByte(Literal, i),
           diag::warn_cfstring_literal_contains_nul_character)
        << Arg->getSourceRange();
      break;
@ -565,7 +630,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
    if (Str[StrIdx] == '\0') {
      // The string returned by getStrData() is not null-terminated,
      // so the presence of a null character is likely an error.
-      Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
+      Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
           diag::warn_printf_format_string_contains_null_char)
        <<  OrigFormatExpr->getSourceRange();
      return;
@ -587,8 +652,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
      ++numConversions;
      if (!HasVAListArg && numConversions > numDataArgs) {
-        SourceLocation Loc = FExpr->getLocStart();
+        SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
        Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
        if (Str[StrIdx-1] == '.')
          Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
@ -607,8 +671,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
        if (BT->getKind() == BuiltinType::Int)
          break;
-      SourceLocation Loc =
+      SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
        PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
      if (Str[StrIdx-1] == '.')
        Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
@ -655,8 +718,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
    case 'n': {
      ++numConversions;
      CurrentState = state_OrdChr;
-      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
+      SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
-                                                      LastConversionIdx+1);
+                                                          LastConversionIdx);
      Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
      break;
@ -669,8 +732,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
        CurrentState = state_OrdChr; 
      else {
        // Issue a warning: invalid format conversion.
-        SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
+        SourceLocation Loc = 
-                                                    LastConversionIdx+1);
+          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
        Diag(Loc, diag::warn_printf_invalid_conversion)
          <<  std::string(Str+LastConversionIdx,
@ -690,8 +753,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
        CurrentState = state_OrdChr; 
      else {
        // Issue a warning: invalid format conversion.
-        SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
+        SourceLocation Loc =
-                                                        LastConversionIdx+1);
+          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
        Diag(Loc, diag::warn_printf_invalid_conversion)
          << std::string(Str+LastConversionIdx, Str+StrIdx)
@ -713,8 +776,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
  if (CurrentState == state_Conversion) {
    // Issue a warning: invalid format conversion.
-    SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
+    SourceLocation Loc =
-                                                    LastConversionIdx+1);
+      getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
    Diag(Loc, diag::warn_printf_invalid_conversion)
      << std::string(Str+LastConversionIdx,
@ -727,8 +790,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
    // CHECK: Does the number of format conversions exceed the number
    //        of data arguments?
    if (numConversions > numDataArgs) {
-      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
+      SourceLocation Loc =
-                                                      LastConversionIdx);
+        getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
      Diag(Loc, diag::warn_printf_insufficient_data_args)
        << OrigFormatExpr->getSourceRange();
--- a/clang/test/SemaObjC/exprs.m
+++ b/clang/test/SemaObjC/exprs.m
@ -1,7 +1,19 @@
 // RUN: clang %s -fsyntax-only -verify
 // rdar://6597252
-Class foo(Class X) {
+Class test1(Class X) {
  return 1 ? X : X;
 }
 // rdar://6079877
 void test2() {
  id str = @"foo" 
          "bar\0"    // expected-warning {{literal contains NUL character}}
          @"baz"  " blarg";
  id str2 = @"foo" 
            "bar"
           @"baz"
           " b\0larg";  // expected-warning {{literal contains NUL character}}
 }