a metric ton of refactoring later, Sema::getLocationOfStringLiteralByte

no longer depends on Preprocessor, so we can move it out of Sema into a nice new StringLiteral::getLocationOfByte method that can be used by any AST client. llvm-svn: 119481
2010-11-17 07:37:15 +00:00 · 2010-11-17 07:37:15 +00:00 · e925d61785
parent 35d023164c
commit e925d61785
3 changed files with 83 additions and 63 deletions
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@ -1034,6 +1034,17 @@ public:
    assert(TokNum < NumConcatenated && "Invalid tok number");
    TokLocs[TokNum] = L;
  }
+  
+  /// getLocationOfByte - Return a source location that points to the specified
+  /// byte of this string literal.
+  ///
+  /// Strings are amazingly complex.  They can be formed from multiple tokens
+  /// and can have escape sequences in them in addition to the usual trigraph
+  /// and escaped newline business.  This routine handles this complexity.
+  ///
+  SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                                   const LangOptions &Features,
+                                   const TargetInfo &Target) const;

  typedef const SourceLocation *tokloc_iterator;
  tokloc_iterator tokloc_begin() const { return TokLocs; }
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@ -20,7 +20,10 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/Lexer.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@ -467,6 +470,72 @@ void StringLiteral::setString(ASTContext &C, llvm::StringRef Str) {
  ByteLength = Str.size();
 }

+/// getLocationOfByte - Return a source location that points to the specified
+/// byte of this string literal.
+///
+/// Strings are amazingly complex.  They can be formed from multiple tokens and
+/// can have escape sequences in them in addition to the usual trigraph and
+/// escaped newline business.  This routine handles this complexity.
+///
+SourceLocation StringLiteral::
+getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                  const LangOptions &Features, const TargetInfo &Target) const {
+  assert(!isWide() && "This doesn't work for wide strings yet");
+  
+  // Loop over all of the tokens in this string until we find the one that
+  // contains the byte we're looking for.
+  unsigned TokNo = 0;
+  while (1) {
+    assert(TokNo < getNumConcatenated() && "Invalid byte number!");
+    SourceLocation StrTokLoc = getStrTokenLoc(TokNo);
+    
+    // Get the spelling of the string so that we can get the data that makes up
+    // the string literal, not the identifier for the macro it is potentially
+    // expanded through.
+    SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc);
+    
+    // Re-lex the token to get its length and original spelling.
+    std::pair<FileID, unsigned> LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc);
+    bool Invalid = false;
+    llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+    if (Invalid)
+      return StrTokSpellingLoc;
+    
+    const char *StrData = Buffer.data()+LocInfo.second;
+    
+    // Create a langops struct and enable trigraphs.  This is sufficient for
+    // relexing tokens.
+    LangOptions LangOpts;
+    LangOpts.Trigraphs = true;
+    
+    // Create a lexer starting at the beginning of this token.
+    Lexer TheLexer(StrTokSpellingLoc, Features, Buffer.begin(), StrData,
+                   Buffer.end());
+    Token TheTok;
+    TheLexer.LexFromRawLexer(TheTok);
+    
+    // Use the StringLiteralParser to compute the length of the string in bytes.
+    StringLiteralParser SLP(&TheTok, 1, SM, Features, Target);
+    unsigned TokNumBytes = SLP.GetStringLength();
+    
+    // If the byte is in this token, return the location of the byte.
+    if (ByteNo < TokNumBytes ||
+        (ByteNo == TokNumBytes && TokNo == getNumConcatenated())) {
+      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); 
+      
+      // Now that we know the offset of the token in the spelling, use the
+      // preprocessor to get the offset in the original source.
+      return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features);
+    }
+    
+    // Move to the next string token.
+    ++TokNo;
+    ByteNo -= TokNumBytes;
+  }
+}
+
+
+
 /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it
 /// corresponds to, e.g. "sizeof" or "[pre]++".
 const char *UnaryOperator::getOpcodeStr(Opcode Op) {
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@ -25,7 +25,6 @@
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
-#include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
@ -33,75 +32,16 @@
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/ConvertUTF.h"
-
 #include <limits>
 using namespace clang;
 using namespace sema;

-/// getLocationOfStringLiteralByte - Return a source location that points to the
-/// specified byte of the specified string literal.
-///
-/// Strings are amazingly complex.  They can be formed from multiple tokens and
-/// can have escape sequences in them in addition to the usual trigraph and
-/// escaped newline business.  This routine handles this complexity.
-///
 SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
                                                    unsigned ByteNo) const {
-  assert(!SL->isWide() && "This doesn't work for wide strings yet");
-
-  // Loop over all of the tokens in this string until we find the one that
-  // contains the byte we're looking for.
-  unsigned TokNo = 0;
-  while (1) {
-    assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
-    SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
-
-    // Get the spelling of the string so that we can get the data that makes up
-    // the string literal, not the identifier for the macro it is potentially
-    // expanded through.
-    SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
-
-    // Re-lex the token to get its length and original spelling.
-    std::pair<FileID, unsigned> LocInfo =
-      SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
-    bool Invalid = false;
-    llvm::StringRef Buffer = SourceMgr.getBufferData(LocInfo.first, &Invalid);
-    if (Invalid)
-      return StrTokSpellingLoc;
-      
-    const char *StrData = Buffer.data()+LocInfo.second;
-
-    // Create a langops struct and enable trigraphs.  This is sufficient for
-    // relexing tokens.
-    LangOptions LangOpts;
-    LangOpts.Trigraphs = true;
-
-    // Create a lexer starting at the beginning of this token.
-    Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.begin(), StrData,
-                   Buffer.end());
-    Token TheTok;
-    TheLexer.LexFromRawLexer(TheTok);
-
-    // Use the StringLiteralParser to compute the length of the string in bytes.
-    StringLiteralParser SLP(&TheTok, 1, PP.getSourceManager(),
-                            PP.getLangOptions(), PP.getTargetInfo());
-    unsigned TokNumBytes = SLP.GetStringLength();
-
-    // If the byte is in this token, return the location of the byte.
-    if (ByteNo < TokNumBytes ||
-        (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
-      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); 
-
-      // Now that we know the offset of the token in the spelling, use the
-      // preprocessor to get the offset in the original source.
-      return PP.AdvanceToTokenCharacter(StrTokLoc, Offset);
-    }
-
-    // Move to the next string token.
-    ++TokNo;
-    ByteNo -= TokNumBytes;
-  }
+  return SL->getLocationOfByte(ByteNo, PP.getSourceManager(),
+                               PP.getLangOptions(), PP.getTargetInfo());
 }
+  

 /// CheckablePrintfAttr - does a function call have a "printf" attribute
 /// and arguments that merit checking?