Add support for C++0x raw string literals.

llvm-svn: 137298
This commit is contained in:
Craig Topper 2011-08-11 04:06:15 +00:00
parent dbd1352c80
commit 54edccafc5
10 changed files with 395 additions and 110 deletions

View File

@ -55,6 +55,15 @@ def err_unterminated___pragma : Error<"missing terminating ')' character">;
def err_conflict_marker : Error<"version control conflict marker in file">;
def err_raw_delim_too_long : Error<
"raw string delimiter longer than 16 characters"
"; use PREFIX( )PREFIX to delimit raw string">;
def err_invalid_char_raw_delim : Error<
"invalid character '%0' character in raw string delimiter"
"; use PREFIX( )PREFIX to delimit raw string">;
def err_unterminated_raw_string : Error<
"raw string missing terminating delimiter )%0\"">;
def ext_multichar_character_literal : ExtWarn<
"multi-character character constant">, InGroup<MultiChar>;
def ext_four_char_character_literal : Extension<

View File

@ -485,6 +485,8 @@ private:
void LexNumericConstant (Token &Result, const char *CurPtr);
void LexStringLiteral (Token &Result, const char *CurPtr,
tok::TokenKind Kind);
void LexRawStringLiteral (Token &Result, const char *CurPtr,
tok::TokenKind Kind);
void LexAngledStringLiteral(Token &Result, const char *CurPtr);
void LexCharConstant (Token &Result, const char *CurPtr,
tok::TokenKind Kind);

View File

@ -197,6 +197,7 @@ public:
private:
void init(const Token *StringToks, unsigned NumStringToks);
void CopyStringFragment(const StringRef &Fragment);
};
} // end namespace clang

View File

@ -33,6 +33,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cctype>
#include <cstring>
using namespace clang;
static void InitCharacterInfo();
@ -760,7 +761,8 @@ enum {
CHAR_LETTER = 0x04, // a-z,A-Z
CHAR_NUMBER = 0x08, // 0-9
CHAR_UNDER = 0x10, // _
CHAR_PERIOD = 0x20 // .
CHAR_PERIOD = 0x20, // .
CHAR_RAWDEL = 0x40 // {}[]#<>%:;?*+-/^&|~!=,"'
};
// Statically initialize CharInfo table based on ASCII character set
@ -785,20 +787,20 @@ static const unsigned char CharInfo[256] =
0 , 0 , 0 , 0 ,
//32 SP 33 ! 34 " 35 #
//36 $ 37 % 38 & 39 '
CHAR_HORZ_WS, 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
//40 ( 41 ) 42 * 43 +
//44 , 45 - 46 . 47 /
0 , 0 , 0 , 0 ,
0 , 0 , CHAR_PERIOD , 0 ,
0 , 0 , CHAR_RAWDEL , CHAR_RAWDEL ,
CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
//48 0 49 1 50 2 51 3
//52 4 53 5 54 6 55 7
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
//56 8 57 9 58 : 59 ;
//60 < 61 = 62 > 63 ?
CHAR_NUMBER , CHAR_NUMBER , 0 , 0 ,
0 , 0 , 0 , 0 ,
CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL ,
CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
//64 @ 65 A 66 B 67 C
//68 D 69 E 70 F 71 G
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
@ -813,8 +815,8 @@ static const unsigned char CharInfo[256] =
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
//88 X 89 Y 90 Z 91 [
//92 \ 93 ] 94 ^ 95 _
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
0 , 0 , 0 , CHAR_UNDER ,
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER ,
//96 ` 97 a 98 b 99 c
//100 d 101 e 102 f 103 g
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
@ -828,9 +830,9 @@ static const unsigned char CharInfo[256] =
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
//120 x 121 y 122 z 123 {
//124 | 125 } 126 ~ 127 DEL
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
0 , 0 , 0 , 0
//124 | 125 } 126 ~ 127 DEL
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
};
static void InitCharacterInfo() {
@ -888,6 +890,14 @@ static inline bool isNumberBody(unsigned char c) {
true : false;
}
/// isRawStringDelimBody - Return true if this is the body character of a
/// raw string delimiter.
static inline bool isRawStringDelimBody(unsigned char c) {
return (CharInfo[c] &
(CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ?
true : false;
}
//===----------------------------------------------------------------------===//
// Diagnostics forwarding code.
@ -1363,6 +1373,78 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
Result.setLiteralData(TokStart);
}
/// LexRawStringLiteral - Lex the remainder of a raw string literal, after
/// having lexed R", LR", u8R", uR", or UR".
void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
tok::TokenKind Kind) {
// This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:
// Between the initial and final double quote characters of the raw string,
// any transformations performed in phases 1 and 2 (trigraphs,
// universal-character-names, and line splicing) are reverted.
unsigned PrefixLen = 0;
while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
++PrefixLen;
// If the last character was not a '(', then we didn't lex a valid delimiter.
if (CurPtr[PrefixLen] != '(') {
if (!isLexingRawMode()) {
const char *PrefixEnd = &CurPtr[PrefixLen];
if (PrefixLen == 16) {
Diag(PrefixEnd, diag::err_raw_delim_too_long);
} else {
Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
<< StringRef(PrefixEnd, 1);
}
}
// Search for the next '"' in hopes of salvaging the lexer. Unfortunately,
// it's possible the '"' was intended to be part of the raw string, but
// there's not much we can do about that.
while (1) {
char C = *CurPtr++;
if (C == '"')
break;
if (C == 0 && CurPtr-1 == BufferEnd) {
--CurPtr;
break;
}
}
FormTokenWithChars(Result, CurPtr, tok::unknown);
return;
}
// Save prefix and move CurPtr past it
const char *Prefix = CurPtr;
CurPtr += PrefixLen + 1; // skip over prefix and '('
while (1) {
char C = *CurPtr++;
if (C == ')') {
// Check for prefix match and closing quote.
if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') {
CurPtr += PrefixLen + 1; // skip over prefix and '"'
break;
}
} else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.
if (!isLexingRawMode())
Diag(BufferPtr, diag::err_unterminated_raw_string)
<< StringRef(Prefix, PrefixLen);
FormTokenWithChars(Result, CurPtr-1, tok::unknown);
return;
}
}
// Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, Kind);
Result.setLiteralData(TokStart);
}
/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
/// after having lexed the '<' character. This is used for #include filenames.
void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
@ -2262,12 +2344,36 @@ LexNextToken:
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
tok::utf16_char_constant);
// UTF-8 string literal
if (Char == '8' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
tok::utf8_string_literal);
// UTF-16 raw string literal
if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
tok::utf16_string_literal);
if (Char == '8') {
char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
// UTF-8 string literal
if (Char2 == '"')
return LexStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
tok::utf8_string_literal);
if (Char2 == 'R') {
unsigned SizeTmp3;
char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
// UTF-8 raw string literal
if (Char3 == '"') {
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
SizeTmp3, Result),
tok::utf8_string_literal);
}
}
}
}
// treat u like the start of an identifier.
@ -2289,11 +2395,34 @@ LexNextToken:
if (Char == '\'')
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
tok::utf32_char_constant);
// UTF-32 raw string literal
if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
tok::utf32_string_literal);
}
// treat U like the start of an identifier.
return LexIdentifier(Result, CurPtr);
case 'R': // Identifier or C++0x raw string literal
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
if (Features.CPlusPlus0x) {
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '"')
return LexRawStringLiteral(Result,
ConsumeChar(CurPtr, SizeTmp, Result),
tok::string_literal);
}
// treat R like the start of an identifier.
return LexIdentifier(Result, CurPtr);
case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
@ -2304,6 +2433,14 @@ LexNextToken:
return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
tok::wide_string_literal);
// Wide raw string literal.
if (Features.CPlusPlus0x && Char == 'R' &&
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
return LexRawStringLiteral(Result,
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
SizeTmp2, Result),
tok::wide_string_literal);
// Wide character constant.
if (Char == '\'')
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
@ -2313,7 +2450,7 @@ LexNextToken:
// C99 6.4.2: Identifiers.
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': /*'U'*/
case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/
case 'V': case 'W': case 'X': case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':

View File

@ -713,6 +713,38 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
}
/// character-literal: [C++0x lex.ccon]
/// ' c-char-sequence '
/// u' c-char-sequence '
/// U' c-char-sequence '
/// L' c-char-sequence '
/// c-char-sequence:
/// c-char
/// c-char-sequence c-char
/// c-char:
/// any member of the source character set except the single-quote ',
/// backslash \, or new-line character
/// escape-sequence
/// universal-character-name
/// escape-sequence: [C++0x lex.ccon]
/// simple-escape-sequence
/// octal-escape-sequence
/// hexadecimal-escape-sequence
/// simple-escape-sequence:
/// one of \ \" \? \\ \a \b \f \n \r \t \v
/// octal-escape-sequence:
/// \ octal-digit
/// \ octal-digit octal-digit
/// \ octal-digit octal-digit octal-digit
/// hexadecimal-escape-sequence:
/// \x hexadecimal-digit
/// hexadecimal-escape-sequence hexadecimal-digit
/// universal-character-name:
/// \u hex-quad
/// \U hex-quad hex-quad
/// hex-quad:
/// hex-digit hex-digit hex-digit hex-digit
///
CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
SourceLocation Loc, Preprocessor &PP,
tok::TokenKind kind) {
@ -825,34 +857,52 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
}
/// string-literal: [C99 6.4.5]
/// " [s-char-sequence] "
/// L" [s-char-sequence] "
/// string-literal: [C++0x lex.string]
/// encoding-prefix " [s-char-sequence] "
/// encoding-prefix R raw-string
/// encoding-prefix:
/// u8
/// u
/// U
/// L
/// s-char-sequence:
/// s-char
/// s-char-sequence s-char
/// s-char:
/// any source character except the double quote ",
/// backslash \, or newline character
/// escape-character
/// any member of the source character set except the double-quote ",
/// backslash \, or new-line character
/// escape-sequence
/// universal-character-name
/// escape-character: [C99 6.4.4.4]
/// \ escape-code
/// universal-character-name
/// escape-code:
/// character-escape-code
/// octal-escape-code
/// hex-escape-code
/// character-escape-code: one of
/// n t b r f v a
/// \ ' " ?
/// octal-escape-code:
/// octal-digit
/// octal-digit octal-digit
/// octal-digit octal-digit octal-digit
/// hex-escape-code:
/// x hex-digit
/// hex-escape-code hex-digit
/// raw-string:
/// " d-char-sequence ( r-char-sequence ) d-char-sequence "
/// r-char-sequence:
/// r-char
/// r-char-sequence r-char
/// r-char:
/// any member of the source character set, except a right parenthesis )
/// followed by the initial d-char-sequence (which may be empty)
/// followed by a double quote ".
/// d-char-sequence:
/// d-char
/// d-char-sequence d-char
/// d-char:
/// any member of the basic source character set except:
/// space, the left parenthesis (, the right parenthesis ),
/// the backslash \, and the control characters representing horizontal
/// tab, vertical tab, form feed, and newline.
/// escape-sequence: [C++0x lex.ccon]
/// simple-escape-sequence
/// octal-escape-sequence
/// hexadecimal-escape-sequence
/// simple-escape-sequence:
/// one of \ \" \? \\ \a \b \f \n \r \t \v
/// octal-escape-sequence:
/// \ octal-digit
/// \ octal-digit octal-digit
/// \ octal-digit octal-digit octal-digit
/// hexadecimal-escape-sequence:
/// \x hexadecimal-digit
/// hexadecimal-escape-sequence hexadecimal-digit
/// universal-character-name:
/// \u hex-quad
/// \U hex-quad hex-quad
@ -972,64 +1022,69 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
++ThisTokBuf;
}
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
++ThisTokBuf;
// Check for raw string
if (ThisTokBuf[0] == 'R') {
ThisTokBuf += 2; // skip R"
// Check if this is a pascal string
if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
// If the \p sequence is found in the first token, we have a pascal string
// Otherwise, if we already have a pascal string, ignore the first \p
if (i == 0) {
const char *Prefix = ThisTokBuf;
while (ThisTokBuf[0] != '(')
++ThisTokBuf;
Pascal = true;
} else if (Pascal)
ThisTokBuf += 2;
}
++ThisTokBuf; // skip '('
while (ThisTokBuf != ThisTokEnd) {
// Is this a span of non-escape characters?
if (ThisTokBuf[0] != '\\') {
const char *InStart = ThisTokBuf;
do {
// remove same number of characters from the end
if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
ThisTokEnd -= (ThisTokBuf - Prefix);
// Copy the string over
CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf));
} else {
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
++ThisTokBuf; // skip "
// Check if this is a pascal string
if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
// If the \p sequence is found in the first token, we have a pascal string
// Otherwise, if we already have a pascal string, ignore the first \p
if (i == 0) {
++ThisTokBuf;
} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
Pascal = true;
} else if (Pascal)
ThisTokBuf += 2;
}
// Copy the character span over.
unsigned Len = ThisTokBuf-InStart;
if (CharByteWidth == 1) {
memcpy(ResultPtr, InStart, Len);
ResultPtr += Len;
} else {
// Note: our internal rep of wide char tokens is always little-endian.
for (; Len; --Len, ++InStart) {
*ResultPtr++ = InStart[0];
// Add zeros at the end.
for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
*ResultPtr++ = 0;
}
while (ThisTokBuf != ThisTokEnd) {
// Is this a span of non-escape characters?
if (ThisTokBuf[0] != '\\') {
const char *InStart = ThisTokBuf;
do {
++ThisTokBuf;
} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
// Copy the character span over.
CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart));
continue;
}
continue;
}
// Is this a Universal Character Name escape?
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
CharByteWidth, Diags, Features);
continue;
}
// Otherwise, this is a non-UCN escape character. Process it.
unsigned ResultChar =
ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
FullSourceLoc(StringToks[i].getLocation(), SM),
CharByteWidth*8, Diags);
// Is this a Universal Character Name escape?
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
CharByteWidth, Diags, Features);
continue;
}
// Otherwise, this is a non-UCN escape character. Process it.
unsigned ResultChar =
ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
FullSourceLoc(StringToks[i].getLocation(), SM),
CharByteWidth*8, Diags);
// Note: our internal rep of wide char tokens is always little-endian.
*ResultPtr++ = ResultChar & 0xFF;
// Note: our internal rep of wide char tokens is always little-endian.
*ResultPtr++ = ResultChar & 0xFF;
for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
*ResultPtr++ = ResultChar >> i*8;
for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
*ResultPtr++ = ResultChar >> i*8;
}
}
}
@ -1062,6 +1117,25 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
}
/// copyStringFragment - This function copies from Start to End into ResultPtr.
/// Performs widening for multi-byte characters.
void StringLiteralParser::CopyStringFragment(const StringRef &Fragment) {
// Copy the character span over.
if (CharByteWidth == 1) {
memcpy(ResultPtr, Fragment.data(), Fragment.size());
ResultPtr += Fragment.size();
} else {
// Note: our internal rep of wide char tokens is always little-endian.
for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) {
*ResultPtr++ = *I;
// Add zeros at the end.
for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
*ResultPtr++ = 0;
}
}
}
/// getOffsetOfStringByte - This function returns the offset of the
/// specified byte of the string data represented by Token. This handles
/// advancing over escape sequences in the string.

View File

@ -17,39 +17,53 @@
using namespace clang;
/// IsStringPrefix - Return true if Str is a string prefix.
/// 'L', 'u', 'U', or 'u8'. Including raw versions.
static bool IsStringPrefix(const StringRef &Str, bool CPlusPlus0x) {
if (Str[0] == 'L' ||
(CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
if (Str.size() == 1)
return true; // "L", "u", "U", and "R"
// Check for raw flavors. Need to make sure the first character wasn't
// already R. Need CPlusPlus0x check for "LR".
if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x)
return true; // "LR", "uR", "UR"
// Check for "u8" and "u8R"
if (Str[0] == 'u' && Str[1] == '8') {
if (Str.size() == 2) return true; // "u8"
if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R"
}
}
return false;
}
/// IsIdentifierStringPrefix - Return true if the spelling of the token
/// is literally 'L', 'u', 'U', or 'u8'.
/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions.
bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
const LangOptions &LangOpts = PP.getLangOptions();
if (!Tok.needsCleaning()) {
if (Tok.getLength() != 1 && Tok.getLength() != 2)
if (Tok.getLength() < 1 || Tok.getLength() > 3)
return false;
SourceManager &SM = PP.getSourceManager();
const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
if (Tok.getLength() == 1)
return Ptr[0] == 'L' ||
(LangOpts.CPlusPlus0x && (Ptr[0] == 'u' || Ptr[0] == 'U'));
if (Tok.getLength() == 2)
return LangOpts.CPlusPlus0x && Ptr[0] == 'u' && Ptr[1] == '8';
return IsStringPrefix(StringRef(Ptr, Tok.getLength()),
LangOpts.CPlusPlus0x);
}
if (Tok.getLength() < 256) {
char Buffer[256];
const char *TokPtr = Buffer;
unsigned length = PP.getSpelling(Tok, TokPtr);
if (length == 1)
return TokPtr[0] == 'L' ||
(LangOpts.CPlusPlus0x && (TokPtr[0] == 'u' || TokPtr[0] == 'U'));
if (length == 2)
return LangOpts.CPlusPlus0x && TokPtr[0] == 'u' && TokPtr[1] == '8';
return false;
return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x);
}
std::string TokStr = PP.getSpelling(Tok);
return TokStr == "L" || (LangOpts.CPlusPlus0x && (TokStr == "u8" ||
TokStr == "u" ||
TokStr == "U"));
return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x);
}
TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {

View File

@ -1,6 +1,6 @@
// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CPP0X %s
#include <stddef.h>
@ -38,5 +38,28 @@ int main() {
// CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"def\00", align 1
const char *g = u8"def";
// CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"ghi\00", align 1
const char *h = R"foo(ghi)foo";
// CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"jkl\00", align 1
const char *i = u8R"bar(jkl)bar";
// CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"G\00H\00\00\00", align 2
const char16_t *j = uR"foo(GH)foo";
// CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"I\00\00\00J\00\00\00\00\00\00\00", align 4
const char32_t *k = UR"bar(IJ)bar";
// CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"K\00\00\00L\00\00\00\00\00\00\00", align 4
const wchar_t *l = LR"bar(KL)bar";
// CHECK-CPP0X: private unnamed_addr constant [9 x i8] c"abc\5Cndef\00", align 1
const char *m = R"(abc\ndef)";
// CHECK-CPP0X: private unnamed_addr constant [8 x i8] c"abc\0Adef\00", align 1
const char *n = R"(abc
def)";
#endif
}

View File

@ -0,0 +1,6 @@
// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";

View File

@ -0,0 +1,8 @@
// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
const char *str = R"foo(abc
def)bar";
// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
const char *str = R"foo(abc
def)bar";

View File

@ -7,4 +7,15 @@ void f() {
ustr = u"a UTF-16 string"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [16]'}}
char32_t *Ustr;
Ustr = U"a UTF-32 string"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [16]'}}
char *Rstr;
Rstr = "a raw string"; // expected-warning{{conversion from string literal to 'char *' is deprecated}}
wchar_t *LRstr;
LRstr = LR"foo(a wide raw string)foo"; // expected-warning{{conversion from string literal to 'wchar_t *' is deprecated}}
char *u8Rstr;
u8Rstr = u8R"foo(a UTF-8 raw string)foo"; // expected-error {{assigning to 'char *' from incompatible type 'const char [19]'}}
char16_t *uRstr;
uRstr = uR"foo(a UTF-16 raw string)foo"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [20]'}}
char32_t *URstr;
URstr = UR"foo(a UTF-32 raw string)foo"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [20]'}}
}