Add support for C++0x raw string literals.

llvm-svn: 137298
2011-08-11 04:06:15 +00:00 · 2011-08-11 04:06:15 +00:00 · 54edccafc5
parent dbd1352c80
commit 54edccafc5
10 changed files with 395 additions and 110 deletions
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@ -55,6 +55,15 @@ def err_unterminated___pragma : Error<"missing terminating ')' character">;

 def err_conflict_marker : Error<"version control conflict marker in file">;

+def err_raw_delim_too_long : Error<
+  "raw string delimiter longer than 16 characters"
+  "; use PREFIX( )PREFIX to delimit raw string">;
+def err_invalid_char_raw_delim : Error<
+  "invalid character '%0' character in raw string delimiter"
+  "; use PREFIX( )PREFIX to delimit raw string">;
+def err_unterminated_raw_string : Error<
+  "raw string missing terminating delimiter )%0\"">;
+
 def ext_multichar_character_literal : ExtWarn<
  "multi-character character constant">, InGroup<MultiChar>;
 def ext_four_char_character_literal : Extension<
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@ -485,6 +485,8 @@ private:
  void LexNumericConstant    (Token &Result, const char *CurPtr);
  void LexStringLiteral      (Token &Result, const char *CurPtr,
                              tok::TokenKind Kind);
+  void LexRawStringLiteral   (Token &Result, const char *CurPtr,
+                              tok::TokenKind Kind);
  void LexAngledStringLiteral(Token &Result, const char *CurPtr);
  void LexCharConstant       (Token &Result, const char *CurPtr,
                              tok::TokenKind Kind);
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@ -197,6 +197,7 @@ public:

 private:
  void init(const Token *StringToks, unsigned NumStringToks);
+  void CopyStringFragment(const StringRef &Fragment);
 };

 }  // end namespace clang
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@ -33,6 +33,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cctype>
+#include <cstring>
 using namespace clang;

 static void InitCharacterInfo();
@ -760,7 +761,8 @@ enum {
  CHAR_LETTER   = 0x04,  // a-z,A-Z
  CHAR_NUMBER   = 0x08,  // 0-9
  CHAR_UNDER    = 0x10,  // _
-  CHAR_PERIOD   = 0x20   // .
+  CHAR_PERIOD   = 0x20,  // .
+  CHAR_RAWDEL   = 0x40   // {}[]#<>%:;?*+-/^&|~!=,"'
 };

 // Statically initialize CharInfo table based on ASCII character set
@ -785,20 +787,20 @@ static const unsigned char CharInfo[256] =
   0           , 0           , 0           , 0           ,
 //32 SP         33  !         34  "         35  #
 //36  $         37  %         38  &         39  '
-   CHAR_HORZ_WS, 0           , 0           , 0           ,
-   0           , 0           , 0           , 0           ,
+   CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
+   0           , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
 //40  (         41  )         42  *         43  +
 //44  ,         45  -         46  .         47  /
-   0           , 0           , 0           , 0           ,
-   0           , 0           , CHAR_PERIOD , 0           ,
+   0           , 0           , CHAR_RAWDEL , CHAR_RAWDEL ,
+   CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
 //48  0         49  1         50  2         51  3
 //52  4         53  5         54  6         55  7
   CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
   CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
 //56  8         57  9         58  :         59  ;
 //60  <         61  =         62  >         63  ?
-   CHAR_NUMBER , CHAR_NUMBER , 0           , 0           ,
-   0           , 0           , 0           , 0           ,
+   CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL ,
+   CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
 //64  @         65  A         66  B         67  C
 //68  D         69  E         70  F         71  G
   0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
@ -813,8 +815,8 @@ static const unsigned char CharInfo[256] =
   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
 //88  X         89  Y         90  Z         91  [
 //92  \         93  ]         94  ^         95  _
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
-   0           , 0           , 0           , CHAR_UNDER  ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
+   0           , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER  ,
 //96  `         97  a         98  b         99  c
 //100  d       101  e        102  f        103  g
   0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
@ -829,8 +831,8 @@ static const unsigned char CharInfo[256] =
   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
 //120  x       121  y        122  z        123  {
 //124  |       125  }        126  ~        127 DEL
-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
-   0           , 0           , 0           , 0
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
+   CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
 };

 static void InitCharacterInfo() {
@ -888,6 +890,14 @@ static inline bool isNumberBody(unsigned char c) {
    true : false;
 }

+/// isRawStringDelimBody - Return true if this is the body character of a
+/// raw string delimiter.
+static inline bool isRawStringDelimBody(unsigned char c) {
+  return (CharInfo[c] &
+          (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ?
+    true : false;
+}
+

 //===----------------------------------------------------------------------===//
 // Diagnostics forwarding code.
@ -1363,6 +1373,78 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
  Result.setLiteralData(TokStart);
 }

+/// LexRawStringLiteral - Lex the remainder of a raw string literal, after
+/// having lexed R", LR", u8R", uR", or UR".
+void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
+                                tok::TokenKind Kind) {
+  // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:
+  //  Between the initial and final double quote characters of the raw string,
+  //  any transformations performed in phases 1 and 2 (trigraphs,
+  //  universal-character-names, and line splicing) are reverted.
+
+  unsigned PrefixLen = 0;
+
+  while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
+    ++PrefixLen;
+
+  // If the last character was not a '(', then we didn't lex a valid delimiter.
+  if (CurPtr[PrefixLen] != '(') {
+    if (!isLexingRawMode()) {
+      const char *PrefixEnd = &CurPtr[PrefixLen];
+      if (PrefixLen == 16) {
+        Diag(PrefixEnd, diag::err_raw_delim_too_long);
+      } else {
+        Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
+          << StringRef(PrefixEnd, 1);
+      }
+    }
+
+    // Search for the next '"' in hopes of salvaging the lexer. Unfortunately,
+    // it's possible the '"' was intended to be part of the raw string, but
+    // there's not much we can do about that.
+    while (1) {
+      char C = *CurPtr++;
+
+      if (C == '"')
+        break;
+      if (C == 0 && CurPtr-1 == BufferEnd) {
+        --CurPtr;
+        break;
+      }
+    }
+
+    FormTokenWithChars(Result, CurPtr, tok::unknown);
+    return;
+  }
+
+  // Save prefix and move CurPtr past it
+  const char *Prefix = CurPtr;
+  CurPtr += PrefixLen + 1; // skip over prefix and '('
+
+  while (1) {
+    char C = *CurPtr++;
+
+    if (C == ')') {
+      // Check for prefix match and closing quote.
+      if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') {
+        CurPtr += PrefixLen + 1; // skip over prefix and '"'
+        break;
+      }
+    } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.
+      if (!isLexingRawMode())
+        Diag(BufferPtr, diag::err_unterminated_raw_string)
+          << StringRef(Prefix, PrefixLen);
+      FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+      return;
+    }
+  }
+
+  // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
+  FormTokenWithChars(Result, CurPtr, Kind);
+  Result.setLiteralData(TokStart);
+}
+
 /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
 /// after having lexed the '<' character.  This is used for #include filenames.
 void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
@ -2262,12 +2344,36 @@ LexNextToken:
        return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
                               tok::utf16_char_constant);

+      // UTF-16 raw string literal
+      if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+        return LexRawStringLiteral(Result,
+                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                                           SizeTmp2, Result),
+                               tok::utf16_string_literal);
+
+      if (Char == '8') {
+        char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
+
        // UTF-8 string literal
-      if (Char == '8' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+        if (Char2 == '"')
          return LexStringLiteral(Result,
                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                                           SizeTmp2, Result),
                               tok::utf8_string_literal);
+
+        if (Char2 == 'R') {
+          unsigned SizeTmp3;
+          char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
+          // UTF-8 raw string literal
+          if (Char3 == '"') {
+            return LexRawStringLiteral(Result,
+                   ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                                           SizeTmp2, Result),
+                               SizeTmp3, Result),
+                   tok::utf8_string_literal);
+          }
+        }
+      }
    }

    // treat u like the start of an identifier.
@ -2289,11 +2395,34 @@ LexNextToken:
      if (Char == '\'')
        return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
                               tok::utf32_char_constant);
+
+      // UTF-32 raw string literal
+      if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+        return LexRawStringLiteral(Result,
+                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                                           SizeTmp2, Result),
+                               tok::utf32_string_literal);
    }

    // treat U like the start of an identifier.
    return LexIdentifier(Result, CurPtr);

+  case 'R': // Identifier or C++0x raw string literal
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+
+    if (Features.CPlusPlus0x) {
+      Char = getCharAndSize(CurPtr, SizeTmp);
+
+      if (Char == '"')
+        return LexRawStringLiteral(Result,
+                                   ConsumeChar(CurPtr, SizeTmp, Result),
+                                   tok::string_literal);
+    }
+
+    // treat R like the start of an identifier.
+    return LexIdentifier(Result, CurPtr);
+
  case 'L':   // Identifier (Loony) or wide literal (L'x' or L"xyz").
    // Notify MIOpt that we read a non-whitespace/non-comment token.
    MIOpt.ReadToken();
@ -2304,6 +2433,14 @@ LexNextToken:
      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
                              tok::wide_string_literal);

+    // Wide raw string literal.
+    if (Features.CPlusPlus0x && Char == 'R' &&
+        getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+      return LexRawStringLiteral(Result,
+                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                                           SizeTmp2, Result),
+                               tok::wide_string_literal);
+
    // Wide character constant.
    if (Char == '\'')
      return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
@ -2313,7 +2450,7 @@ LexNextToken:
  // C99 6.4.2: Identifiers.
  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
  case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N':
-  case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T':    /*'U'*/
+  case 'O': case 'P': case 'Q':    /*'R'*/case 'S': case 'T':    /*'U'*/
  case 'V': case 'W': case 'X': case 'Y': case 'Z':
  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@ -713,6 +713,38 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
 }


+///       character-literal: [C++0x lex.ccon]
+///         ' c-char-sequence '
+///         u' c-char-sequence '
+///         U' c-char-sequence '
+///         L' c-char-sequence '
+///       c-char-sequence:
+///         c-char
+///         c-char-sequence c-char
+///       c-char:
+///         any member of the source character set except the single-quote ',
+///           backslash \, or new-line character
+///         escape-sequence
+///         universal-character-name
+///       escape-sequence: [C++0x lex.ccon]
+///         simple-escape-sequence
+///         octal-escape-sequence
+///         hexadecimal-escape-sequence
+///       simple-escape-sequence:
+///         one of \’ \" \? \\ \a \b \f \n \r \t \v
+///       octal-escape-sequence:
+///         \ octal-digit
+///         \ octal-digit octal-digit
+///         \ octal-digit octal-digit octal-digit
+///       hexadecimal-escape-sequence:
+///         \x hexadecimal-digit
+///         hexadecimal-escape-sequence hexadecimal-digit
+///       universal-character-name:
+///         \u hex-quad
+///         \U hex-quad hex-quad
+///       hex-quad:
+///         hex-digit hex-digit hex-digit hex-digit
+///
 CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
                                     SourceLocation Loc, Preprocessor &PP,
                                     tok::TokenKind kind) {
@ -825,34 +857,52 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
 }


-///       string-literal: [C99 6.4.5]
-///          " [s-char-sequence] "
-///         L" [s-char-sequence] "
+///       string-literal: [C++0x lex.string]
+///         encoding-prefix " [s-char-sequence] "
+///         encoding-prefix R raw-string
+///       encoding-prefix:
+///         u8
+///         u
+///         U
+///         L
 ///       s-char-sequence:
 ///         s-char
 ///         s-char-sequence s-char
 ///       s-char:
-///         any source character except the double quote ",
-///           backslash \, or newline character
-///         escape-character
+///         any member of the source character set except the double-quote ",
+///           backslash \, or new-line character
+///         escape-sequence
 ///         universal-character-name
-///       escape-character: [C99 6.4.4.4]
-///         \ escape-code
-///         universal-character-name
-///       escape-code:
-///         character-escape-code
-///         octal-escape-code
-///         hex-escape-code
-///       character-escape-code: one of
-///         n t b r f v a
-///         \ ' " ?
-///       octal-escape-code:
-///         octal-digit
-///         octal-digit octal-digit
-///         octal-digit octal-digit octal-digit
-///       hex-escape-code:
-///         x hex-digit
-///         hex-escape-code hex-digit
+///       raw-string:
+///         " d-char-sequence ( r-char-sequence ) d-char-sequence "
+///       r-char-sequence:
+///         r-char
+///         r-char-sequence r-char
+///       r-char:
+///         any member of the source character set, except a right parenthesis )
+///           followed by the initial d-char-sequence (which may be empty)
+///           followed by a double quote ".
+///       d-char-sequence:
+///         d-char
+///         d-char-sequence d-char
+///       d-char:
+///         any member of the basic source character set except:
+///           space, the left parenthesis (, the right parenthesis ),
+///           the backslash \, and the control characters representing horizontal
+///           tab, vertical tab, form feed, and newline.
+///       escape-sequence: [C++0x lex.ccon]
+///         simple-escape-sequence
+///         octal-escape-sequence
+///         hexadecimal-escape-sequence
+///       simple-escape-sequence:
+///         one of \’ \" \? \\ \a \b \f \n \r \t \v
+///       octal-escape-sequence:
+///         \ octal-digit
+///         \ octal-digit octal-digit
+///         \ octal-digit octal-digit octal-digit
+///       hexadecimal-escape-sequence:
+///         \x hexadecimal-digit
+///         hexadecimal-escape-sequence hexadecimal-digit
 ///       universal-character-name:
 ///         \u hex-quad
 ///         \U hex-quad hex-quad
@ -972,8 +1022,24 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
        ++ThisTokBuf;
    }

-    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+    // Check for raw string
+    if (ThisTokBuf[0] == 'R') {
+      ThisTokBuf += 2; // skip R"
+
+      const char *Prefix = ThisTokBuf;
+      while (ThisTokBuf[0] != '(')
        ++ThisTokBuf;
+      ++ThisTokBuf; // skip '('
+
+      // remove same number of characters from the end
+      if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
+        ThisTokEnd -= (ThisTokBuf - Prefix);
+
+      // Copy the string over
+      CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf));
+    } else {
+      assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+      ++ThisTokBuf; // skip "

      // Check if this is a pascal string
      if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
@ -997,19 +1063,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
          } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');

          // Copy the character span over.
-        unsigned Len = ThisTokBuf-InStart;
-        if (CharByteWidth == 1) {
-          memcpy(ResultPtr, InStart, Len);
-          ResultPtr += Len;
-        } else {
-          // Note: our internal rep of wide char tokens is always little-endian.
-          for (; Len; --Len, ++InStart) {
-            *ResultPtr++ = InStart[0];
-            // Add zeros at the end.
-            for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
-              *ResultPtr++ = 0;
-          }
-        }
+          CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart));
          continue;
        }
        // Is this a Universal Character Name escape?
@ -1032,6 +1086,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
          *ResultPtr++ = ResultChar >> i*8;
      }
    }
+  }

  if (Pascal) {
    ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
@ -1062,6 +1117,25 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
 }


+/// copyStringFragment - This function copies from Start to End into ResultPtr.
+/// Performs widening for multi-byte characters.
+void StringLiteralParser::CopyStringFragment(const StringRef &Fragment) {
+  // Copy the character span over.
+  if (CharByteWidth == 1) {
+    memcpy(ResultPtr, Fragment.data(), Fragment.size());
+    ResultPtr += Fragment.size();
+  } else {
+    // Note: our internal rep of wide char tokens is always little-endian.
+    for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) {
+      *ResultPtr++ = *I;
+      // Add zeros at the end.
+      for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
+        *ResultPtr++ = 0;
+    }
+  }
+}
+
+
 /// getOffsetOfStringByte - This function returns the offset of the
 /// specified byte of the string data represented by Token.  This handles
 /// advancing over escape sequences in the string.
--- a/clang/lib/Lex/TokenConcatenation.cpp
+++ b/clang/lib/Lex/TokenConcatenation.cpp
@ -17,39 +17,53 @@
 using namespace clang;


+/// IsStringPrefix - Return true if Str is a string prefix.
+/// 'L', 'u', 'U', or 'u8'. Including raw versions.
+static bool IsStringPrefix(const StringRef &Str, bool CPlusPlus0x) {
+
+  if (Str[0] == 'L' ||
+      (CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
+
+    if (Str.size() == 1)
+      return true; // "L", "u", "U", and "R"
+
+    // Check for raw flavors. Need to make sure the first character wasn't
+    // already R. Need CPlusPlus0x check for "LR".
+    if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x)
+      return true; // "LR", "uR", "UR"
+
+    // Check for "u8" and "u8R"
+    if (Str[0] == 'u' && Str[1] == '8') {
+      if (Str.size() == 2) return true; // "u8"
+      if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R"
+    }
+  }
+
+  return false;
+}
+
 /// IsIdentifierStringPrefix - Return true if the spelling of the token
-/// is literally 'L', 'u', 'U', or 'u8'.
+/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions.
 bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
  const LangOptions &LangOpts = PP.getLangOptions();

  if (!Tok.needsCleaning()) {
-    if (Tok.getLength() != 1 && Tok.getLength() != 2)
+    if (Tok.getLength() < 1 || Tok.getLength() > 3)
      return false;
    SourceManager &SM = PP.getSourceManager();
    const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
-    if (Tok.getLength() == 1)
-      return Ptr[0] == 'L' ||
-             (LangOpts.CPlusPlus0x && (Ptr[0] == 'u' || Ptr[0] == 'U'));
-    if (Tok.getLength() == 2)
-      return LangOpts.CPlusPlus0x && Ptr[0] == 'u' && Ptr[1] == '8';
+    return IsStringPrefix(StringRef(Ptr, Tok.getLength()),
+                          LangOpts.CPlusPlus0x);
  }

  if (Tok.getLength() < 256) {
    char Buffer[256];
    const char *TokPtr = Buffer;
    unsigned length = PP.getSpelling(Tok, TokPtr);
-    if (length == 1)
-      return TokPtr[0] == 'L' ||
-             (LangOpts.CPlusPlus0x && (TokPtr[0] == 'u' || TokPtr[0] == 'U'));
-    if (length == 2)
-      return LangOpts.CPlusPlus0x && TokPtr[0] == 'u' && TokPtr[1] == '8';
-    return false;
+    return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x);
  }

-  std::string TokStr = PP.getSpelling(Tok);
-  return TokStr == "L" || (LangOpts.CPlusPlus0x && (TokStr == "u8" ||
-                                                    TokStr == "u" ||
-                                                    TokStr == "U"));
+  return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x);
 }

 TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
--- a/clang/test/CodeGen/string-literal.c
+++ b/clang/test/CodeGen/string-literal.c
@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
 // RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
-// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
+// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CPP0X %s

 #include <stddef.h>

@ -38,5 +38,28 @@ int main() {

  // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"def\00", align 1
  const char *g = u8"def";
+
+  // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"ghi\00", align 1
+  const char *h = R"foo(ghi)foo";
+
+  // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"jkl\00", align 1
+  const char *i = u8R"bar(jkl)bar";
+
+  // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"G\00H\00\00\00", align 2
+  const char16_t *j = uR"foo(GH)foo";
+
+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"I\00\00\00J\00\00\00\00\00\00\00", align 4
+  const char32_t *k = UR"bar(IJ)bar";
+
+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"K\00\00\00L\00\00\00\00\00\00\00", align 4
+  const wchar_t *l = LR"bar(KL)bar";
+
+  // CHECK-CPP0X: private unnamed_addr constant [9 x i8] c"abc\5Cndef\00", align 1
+  const char *m = R"(abc\ndef)";
+
+  // CHECK-CPP0X: private unnamed_addr constant [8 x i8] c"abc\0Adef\00", align 1
+  const char *n = R"(abc
+def)";
+
 #endif
 }
--- a/clang/test/Lexer/cxx0x_raw_string_delim_length.cpp
+++ b/clang/test/Lexer/cxx0x_raw_string_delim_length.cpp
@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
+
+const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
+
+const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
--- a/clang/test/Lexer/cxx0x_raw_string_unterminated.cpp
+++ b/clang/test/Lexer/cxx0x_raw_string_unterminated.cpp
@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
+
+const char *str = R"foo(abc
+def)bar";
+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
+
+const char *str = R"foo(abc
+def)bar";
--- a/clang/test/SemaCXX/cxx0x-type-convert-construct.cpp
+++ b/clang/test/SemaCXX/cxx0x-type-convert-construct.cpp
@ -7,4 +7,15 @@ void f() {
  ustr = u"a UTF-16 string"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [16]'}}
  char32_t *Ustr;
  Ustr = U"a UTF-32 string"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [16]'}}
+
+  char *Rstr;
+  Rstr = "a raw string"; // expected-warning{{conversion from string literal to 'char *' is deprecated}}
+  wchar_t *LRstr;
+  LRstr = LR"foo(a wide raw string)foo"; // expected-warning{{conversion from string literal to 'wchar_t *' is deprecated}}
+  char *u8Rstr;
+  u8Rstr = u8R"foo(a UTF-8 raw string)foo"; // expected-error {{assigning to 'char *' from incompatible type 'const char [19]'}}
+  char16_t *uRstr;
+  uRstr = uR"foo(a UTF-16 raw string)foo"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [20]'}}
+  char32_t *URstr;
+  URstr = UR"foo(a UTF-32 raw string)foo"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [20]'}}
 }