forked from OSchip/llvm-project
Add support for C++0x raw string literals.
llvm-svn: 137298
This commit is contained in:
parent
dbd1352c80
commit
54edccafc5
|
@ -55,6 +55,15 @@ def err_unterminated___pragma : Error<"missing terminating ')' character">;
|
||||||
|
|
||||||
def err_conflict_marker : Error<"version control conflict marker in file">;
|
def err_conflict_marker : Error<"version control conflict marker in file">;
|
||||||
|
|
||||||
|
def err_raw_delim_too_long : Error<
|
||||||
|
"raw string delimiter longer than 16 characters"
|
||||||
|
"; use PREFIX( )PREFIX to delimit raw string">;
|
||||||
|
def err_invalid_char_raw_delim : Error<
|
||||||
|
"invalid character '%0' character in raw string delimiter"
|
||||||
|
"; use PREFIX( )PREFIX to delimit raw string">;
|
||||||
|
def err_unterminated_raw_string : Error<
|
||||||
|
"raw string missing terminating delimiter )%0\"">;
|
||||||
|
|
||||||
def ext_multichar_character_literal : ExtWarn<
|
def ext_multichar_character_literal : ExtWarn<
|
||||||
"multi-character character constant">, InGroup<MultiChar>;
|
"multi-character character constant">, InGroup<MultiChar>;
|
||||||
def ext_four_char_character_literal : Extension<
|
def ext_four_char_character_literal : Extension<
|
||||||
|
|
|
@ -485,6 +485,8 @@ private:
|
||||||
void LexNumericConstant (Token &Result, const char *CurPtr);
|
void LexNumericConstant (Token &Result, const char *CurPtr);
|
||||||
void LexStringLiteral (Token &Result, const char *CurPtr,
|
void LexStringLiteral (Token &Result, const char *CurPtr,
|
||||||
tok::TokenKind Kind);
|
tok::TokenKind Kind);
|
||||||
|
void LexRawStringLiteral (Token &Result, const char *CurPtr,
|
||||||
|
tok::TokenKind Kind);
|
||||||
void LexAngledStringLiteral(Token &Result, const char *CurPtr);
|
void LexAngledStringLiteral(Token &Result, const char *CurPtr);
|
||||||
void LexCharConstant (Token &Result, const char *CurPtr,
|
void LexCharConstant (Token &Result, const char *CurPtr,
|
||||||
tok::TokenKind Kind);
|
tok::TokenKind Kind);
|
||||||
|
|
|
@ -197,6 +197,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void init(const Token *StringToks, unsigned NumStringToks);
|
void init(const Token *StringToks, unsigned NumStringToks);
|
||||||
|
void CopyStringFragment(const StringRef &Fragment);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace clang
|
} // end namespace clang
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
#include "llvm/Support/Compiler.h"
|
#include "llvm/Support/Compiler.h"
|
||||||
#include "llvm/Support/MemoryBuffer.h"
|
#include "llvm/Support/MemoryBuffer.h"
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
#include <cstring>
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
|
|
||||||
static void InitCharacterInfo();
|
static void InitCharacterInfo();
|
||||||
|
@ -760,7 +761,8 @@ enum {
|
||||||
CHAR_LETTER = 0x04, // a-z,A-Z
|
CHAR_LETTER = 0x04, // a-z,A-Z
|
||||||
CHAR_NUMBER = 0x08, // 0-9
|
CHAR_NUMBER = 0x08, // 0-9
|
||||||
CHAR_UNDER = 0x10, // _
|
CHAR_UNDER = 0x10, // _
|
||||||
CHAR_PERIOD = 0x20 // .
|
CHAR_PERIOD = 0x20, // .
|
||||||
|
CHAR_RAWDEL = 0x40 // {}[]#<>%:;?*+-/^&|~!=,"'
|
||||||
};
|
};
|
||||||
|
|
||||||
// Statically initialize CharInfo table based on ASCII character set
|
// Statically initialize CharInfo table based on ASCII character set
|
||||||
|
@ -785,20 +787,20 @@ static const unsigned char CharInfo[256] =
|
||||||
0 , 0 , 0 , 0 ,
|
0 , 0 , 0 , 0 ,
|
||||||
//32 SP 33 ! 34 " 35 #
|
//32 SP 33 ! 34 " 35 #
|
||||||
//36 $ 37 % 38 & 39 '
|
//36 $ 37 % 38 & 39 '
|
||||||
CHAR_HORZ_WS, 0 , 0 , 0 ,
|
CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
|
||||||
0 , 0 , 0 , 0 ,
|
0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
|
||||||
//40 ( 41 ) 42 * 43 +
|
//40 ( 41 ) 42 * 43 +
|
||||||
//44 , 45 - 46 . 47 /
|
//44 , 45 - 46 . 47 /
|
||||||
0 , 0 , 0 , 0 ,
|
0 , 0 , CHAR_RAWDEL , CHAR_RAWDEL ,
|
||||||
0 , 0 , CHAR_PERIOD , 0 ,
|
CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
|
||||||
//48 0 49 1 50 2 51 3
|
//48 0 49 1 50 2 51 3
|
||||||
//52 4 53 5 54 6 55 7
|
//52 4 53 5 54 6 55 7
|
||||||
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
|
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
|
||||||
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
|
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
|
||||||
//56 8 57 9 58 : 59 ;
|
//56 8 57 9 58 : 59 ;
|
||||||
//60 < 61 = 62 > 63 ?
|
//60 < 61 = 62 > 63 ?
|
||||||
CHAR_NUMBER , CHAR_NUMBER , 0 , 0 ,
|
CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL ,
|
||||||
0 , 0 , 0 , 0 ,
|
CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
|
||||||
//64 @ 65 A 66 B 67 C
|
//64 @ 65 A 66 B 67 C
|
||||||
//68 D 69 E 70 F 71 G
|
//68 D 69 E 70 F 71 G
|
||||||
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
|
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
|
||||||
|
@ -813,8 +815,8 @@ static const unsigned char CharInfo[256] =
|
||||||
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
|
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
|
||||||
//88 X 89 Y 90 Z 91 [
|
//88 X 89 Y 90 Z 91 [
|
||||||
//92 \ 93 ] 94 ^ 95 _
|
//92 \ 93 ] 94 ^ 95 _
|
||||||
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
|
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
|
||||||
0 , 0 , 0 , CHAR_UNDER ,
|
0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER ,
|
||||||
//96 ` 97 a 98 b 99 c
|
//96 ` 97 a 98 b 99 c
|
||||||
//100 d 101 e 102 f 103 g
|
//100 d 101 e 102 f 103 g
|
||||||
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
|
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
|
||||||
|
@ -829,8 +831,8 @@ static const unsigned char CharInfo[256] =
|
||||||
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
|
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
|
||||||
//120 x 121 y 122 z 123 {
|
//120 x 121 y 122 z 123 {
|
||||||
//124 | 125 } 126 ~ 127 DEL
|
//124 | 125 } 126 ~ 127 DEL
|
||||||
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
|
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
|
||||||
0 , 0 , 0 , 0
|
CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
|
||||||
};
|
};
|
||||||
|
|
||||||
static void InitCharacterInfo() {
|
static void InitCharacterInfo() {
|
||||||
|
@ -888,6 +890,14 @@ static inline bool isNumberBody(unsigned char c) {
|
||||||
true : false;
|
true : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// isRawStringDelimBody - Return true if this is the body character of a
|
||||||
|
/// raw string delimiter.
|
||||||
|
static inline bool isRawStringDelimBody(unsigned char c) {
|
||||||
|
return (CharInfo[c] &
|
||||||
|
(CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ?
|
||||||
|
true : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Diagnostics forwarding code.
|
// Diagnostics forwarding code.
|
||||||
|
@ -1363,6 +1373,78 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
|
||||||
Result.setLiteralData(TokStart);
|
Result.setLiteralData(TokStart);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// LexRawStringLiteral - Lex the remainder of a raw string literal, after
|
||||||
|
/// having lexed R", LR", u8R", uR", or UR".
|
||||||
|
void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
|
||||||
|
tok::TokenKind Kind) {
|
||||||
|
// This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:
|
||||||
|
// Between the initial and final double quote characters of the raw string,
|
||||||
|
// any transformations performed in phases 1 and 2 (trigraphs,
|
||||||
|
// universal-character-names, and line splicing) are reverted.
|
||||||
|
|
||||||
|
unsigned PrefixLen = 0;
|
||||||
|
|
||||||
|
while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
|
||||||
|
++PrefixLen;
|
||||||
|
|
||||||
|
// If the last character was not a '(', then we didn't lex a valid delimiter.
|
||||||
|
if (CurPtr[PrefixLen] != '(') {
|
||||||
|
if (!isLexingRawMode()) {
|
||||||
|
const char *PrefixEnd = &CurPtr[PrefixLen];
|
||||||
|
if (PrefixLen == 16) {
|
||||||
|
Diag(PrefixEnd, diag::err_raw_delim_too_long);
|
||||||
|
} else {
|
||||||
|
Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
|
||||||
|
<< StringRef(PrefixEnd, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search for the next '"' in hopes of salvaging the lexer. Unfortunately,
|
||||||
|
// it's possible the '"' was intended to be part of the raw string, but
|
||||||
|
// there's not much we can do about that.
|
||||||
|
while (1) {
|
||||||
|
char C = *CurPtr++;
|
||||||
|
|
||||||
|
if (C == '"')
|
||||||
|
break;
|
||||||
|
if (C == 0 && CurPtr-1 == BufferEnd) {
|
||||||
|
--CurPtr;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FormTokenWithChars(Result, CurPtr, tok::unknown);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save prefix and move CurPtr past it
|
||||||
|
const char *Prefix = CurPtr;
|
||||||
|
CurPtr += PrefixLen + 1; // skip over prefix and '('
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
char C = *CurPtr++;
|
||||||
|
|
||||||
|
if (C == ')') {
|
||||||
|
// Check for prefix match and closing quote.
|
||||||
|
if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') {
|
||||||
|
CurPtr += PrefixLen + 1; // skip over prefix and '"'
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.
|
||||||
|
if (!isLexingRawMode())
|
||||||
|
Diag(BufferPtr, diag::err_unterminated_raw_string)
|
||||||
|
<< StringRef(Prefix, PrefixLen);
|
||||||
|
FormTokenWithChars(Result, CurPtr-1, tok::unknown);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the location of token as well as BufferPtr.
|
||||||
|
const char *TokStart = BufferPtr;
|
||||||
|
FormTokenWithChars(Result, CurPtr, Kind);
|
||||||
|
Result.setLiteralData(TokStart);
|
||||||
|
}
|
||||||
|
|
||||||
/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
|
/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
|
||||||
/// after having lexed the '<' character. This is used for #include filenames.
|
/// after having lexed the '<' character. This is used for #include filenames.
|
||||||
void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
|
void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
|
||||||
|
@ -2262,12 +2344,36 @@ LexNextToken:
|
||||||
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
|
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
tok::utf16_char_constant);
|
tok::utf16_char_constant);
|
||||||
|
|
||||||
|
// UTF-16 raw string literal
|
||||||
|
if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
|
||||||
|
return LexRawStringLiteral(Result,
|
||||||
|
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
|
SizeTmp2, Result),
|
||||||
|
tok::utf16_string_literal);
|
||||||
|
|
||||||
|
if (Char == '8') {
|
||||||
|
char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
|
||||||
|
|
||||||
// UTF-8 string literal
|
// UTF-8 string literal
|
||||||
if (Char == '8' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
|
if (Char2 == '"')
|
||||||
return LexStringLiteral(Result,
|
return LexStringLiteral(Result,
|
||||||
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
SizeTmp2, Result),
|
SizeTmp2, Result),
|
||||||
tok::utf8_string_literal);
|
tok::utf8_string_literal);
|
||||||
|
|
||||||
|
if (Char2 == 'R') {
|
||||||
|
unsigned SizeTmp3;
|
||||||
|
char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
|
||||||
|
// UTF-8 raw string literal
|
||||||
|
if (Char3 == '"') {
|
||||||
|
return LexRawStringLiteral(Result,
|
||||||
|
ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
|
SizeTmp2, Result),
|
||||||
|
SizeTmp3, Result),
|
||||||
|
tok::utf8_string_literal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// treat u like the start of an identifier.
|
// treat u like the start of an identifier.
|
||||||
|
@ -2289,11 +2395,34 @@ LexNextToken:
|
||||||
if (Char == '\'')
|
if (Char == '\'')
|
||||||
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
|
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
tok::utf32_char_constant);
|
tok::utf32_char_constant);
|
||||||
|
|
||||||
|
// UTF-32 raw string literal
|
||||||
|
if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
|
||||||
|
return LexRawStringLiteral(Result,
|
||||||
|
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
|
SizeTmp2, Result),
|
||||||
|
tok::utf32_string_literal);
|
||||||
}
|
}
|
||||||
|
|
||||||
// treat U like the start of an identifier.
|
// treat U like the start of an identifier.
|
||||||
return LexIdentifier(Result, CurPtr);
|
return LexIdentifier(Result, CurPtr);
|
||||||
|
|
||||||
|
case 'R': // Identifier or C++0x raw string literal
|
||||||
|
// Notify MIOpt that we read a non-whitespace/non-comment token.
|
||||||
|
MIOpt.ReadToken();
|
||||||
|
|
||||||
|
if (Features.CPlusPlus0x) {
|
||||||
|
Char = getCharAndSize(CurPtr, SizeTmp);
|
||||||
|
|
||||||
|
if (Char == '"')
|
||||||
|
return LexRawStringLiteral(Result,
|
||||||
|
ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
|
tok::string_literal);
|
||||||
|
}
|
||||||
|
|
||||||
|
// treat R like the start of an identifier.
|
||||||
|
return LexIdentifier(Result, CurPtr);
|
||||||
|
|
||||||
case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
|
case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
|
||||||
// Notify MIOpt that we read a non-whitespace/non-comment token.
|
// Notify MIOpt that we read a non-whitespace/non-comment token.
|
||||||
MIOpt.ReadToken();
|
MIOpt.ReadToken();
|
||||||
|
@ -2304,6 +2433,14 @@ LexNextToken:
|
||||||
return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
|
return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
tok::wide_string_literal);
|
tok::wide_string_literal);
|
||||||
|
|
||||||
|
// Wide raw string literal.
|
||||||
|
if (Features.CPlusPlus0x && Char == 'R' &&
|
||||||
|
getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
|
||||||
|
return LexRawStringLiteral(Result,
|
||||||
|
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
|
SizeTmp2, Result),
|
||||||
|
tok::wide_string_literal);
|
||||||
|
|
||||||
// Wide character constant.
|
// Wide character constant.
|
||||||
if (Char == '\'')
|
if (Char == '\'')
|
||||||
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
|
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
|
||||||
|
@ -2313,7 +2450,7 @@ LexNextToken:
|
||||||
// C99 6.4.2: Identifiers.
|
// C99 6.4.2: Identifiers.
|
||||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
|
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
|
||||||
case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
|
case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
|
||||||
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': /*'U'*/
|
case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/
|
||||||
case 'V': case 'W': case 'X': case 'Y': case 'Z':
|
case 'V': case 'W': case 'X': case 'Y': case 'Z':
|
||||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
|
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
|
||||||
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
|
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
|
||||||
|
|
|
@ -713,6 +713,38 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// character-literal: [C++0x lex.ccon]
|
||||||
|
/// ' c-char-sequence '
|
||||||
|
/// u' c-char-sequence '
|
||||||
|
/// U' c-char-sequence '
|
||||||
|
/// L' c-char-sequence '
|
||||||
|
/// c-char-sequence:
|
||||||
|
/// c-char
|
||||||
|
/// c-char-sequence c-char
|
||||||
|
/// c-char:
|
||||||
|
/// any member of the source character set except the single-quote ',
|
||||||
|
/// backslash \, or new-line character
|
||||||
|
/// escape-sequence
|
||||||
|
/// universal-character-name
|
||||||
|
/// escape-sequence: [C++0x lex.ccon]
|
||||||
|
/// simple-escape-sequence
|
||||||
|
/// octal-escape-sequence
|
||||||
|
/// hexadecimal-escape-sequence
|
||||||
|
/// simple-escape-sequence:
|
||||||
|
/// one of \’ \" \? \\ \a \b \f \n \r \t \v
|
||||||
|
/// octal-escape-sequence:
|
||||||
|
/// \ octal-digit
|
||||||
|
/// \ octal-digit octal-digit
|
||||||
|
/// \ octal-digit octal-digit octal-digit
|
||||||
|
/// hexadecimal-escape-sequence:
|
||||||
|
/// \x hexadecimal-digit
|
||||||
|
/// hexadecimal-escape-sequence hexadecimal-digit
|
||||||
|
/// universal-character-name:
|
||||||
|
/// \u hex-quad
|
||||||
|
/// \U hex-quad hex-quad
|
||||||
|
/// hex-quad:
|
||||||
|
/// hex-digit hex-digit hex-digit hex-digit
|
||||||
|
///
|
||||||
CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
|
CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
|
||||||
SourceLocation Loc, Preprocessor &PP,
|
SourceLocation Loc, Preprocessor &PP,
|
||||||
tok::TokenKind kind) {
|
tok::TokenKind kind) {
|
||||||
|
@ -825,34 +857,52 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// string-literal: [C99 6.4.5]
|
/// string-literal: [C++0x lex.string]
|
||||||
/// " [s-char-sequence] "
|
/// encoding-prefix " [s-char-sequence] "
|
||||||
/// L" [s-char-sequence] "
|
/// encoding-prefix R raw-string
|
||||||
|
/// encoding-prefix:
|
||||||
|
/// u8
|
||||||
|
/// u
|
||||||
|
/// U
|
||||||
|
/// L
|
||||||
/// s-char-sequence:
|
/// s-char-sequence:
|
||||||
/// s-char
|
/// s-char
|
||||||
/// s-char-sequence s-char
|
/// s-char-sequence s-char
|
||||||
/// s-char:
|
/// s-char:
|
||||||
/// any source character except the double quote ",
|
/// any member of the source character set except the double-quote ",
|
||||||
/// backslash \, or newline character
|
/// backslash \, or new-line character
|
||||||
/// escape-character
|
/// escape-sequence
|
||||||
/// universal-character-name
|
/// universal-character-name
|
||||||
/// escape-character: [C99 6.4.4.4]
|
/// raw-string:
|
||||||
/// \ escape-code
|
/// " d-char-sequence ( r-char-sequence ) d-char-sequence "
|
||||||
/// universal-character-name
|
/// r-char-sequence:
|
||||||
/// escape-code:
|
/// r-char
|
||||||
/// character-escape-code
|
/// r-char-sequence r-char
|
||||||
/// octal-escape-code
|
/// r-char:
|
||||||
/// hex-escape-code
|
/// any member of the source character set, except a right parenthesis )
|
||||||
/// character-escape-code: one of
|
/// followed by the initial d-char-sequence (which may be empty)
|
||||||
/// n t b r f v a
|
/// followed by a double quote ".
|
||||||
/// \ ' " ?
|
/// d-char-sequence:
|
||||||
/// octal-escape-code:
|
/// d-char
|
||||||
/// octal-digit
|
/// d-char-sequence d-char
|
||||||
/// octal-digit octal-digit
|
/// d-char:
|
||||||
/// octal-digit octal-digit octal-digit
|
/// any member of the basic source character set except:
|
||||||
/// hex-escape-code:
|
/// space, the left parenthesis (, the right parenthesis ),
|
||||||
/// x hex-digit
|
/// the backslash \, and the control characters representing horizontal
|
||||||
/// hex-escape-code hex-digit
|
/// tab, vertical tab, form feed, and newline.
|
||||||
|
/// escape-sequence: [C++0x lex.ccon]
|
||||||
|
/// simple-escape-sequence
|
||||||
|
/// octal-escape-sequence
|
||||||
|
/// hexadecimal-escape-sequence
|
||||||
|
/// simple-escape-sequence:
|
||||||
|
/// one of \’ \" \? \\ \a \b \f \n \r \t \v
|
||||||
|
/// octal-escape-sequence:
|
||||||
|
/// \ octal-digit
|
||||||
|
/// \ octal-digit octal-digit
|
||||||
|
/// \ octal-digit octal-digit octal-digit
|
||||||
|
/// hexadecimal-escape-sequence:
|
||||||
|
/// \x hexadecimal-digit
|
||||||
|
/// hexadecimal-escape-sequence hexadecimal-digit
|
||||||
/// universal-character-name:
|
/// universal-character-name:
|
||||||
/// \u hex-quad
|
/// \u hex-quad
|
||||||
/// \U hex-quad hex-quad
|
/// \U hex-quad hex-quad
|
||||||
|
@ -972,8 +1022,24 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
|
||||||
++ThisTokBuf;
|
++ThisTokBuf;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
|
// Check for raw string
|
||||||
|
if (ThisTokBuf[0] == 'R') {
|
||||||
|
ThisTokBuf += 2; // skip R"
|
||||||
|
|
||||||
|
const char *Prefix = ThisTokBuf;
|
||||||
|
while (ThisTokBuf[0] != '(')
|
||||||
++ThisTokBuf;
|
++ThisTokBuf;
|
||||||
|
++ThisTokBuf; // skip '('
|
||||||
|
|
||||||
|
// remove same number of characters from the end
|
||||||
|
if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
|
||||||
|
ThisTokEnd -= (ThisTokBuf - Prefix);
|
||||||
|
|
||||||
|
// Copy the string over
|
||||||
|
CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf));
|
||||||
|
} else {
|
||||||
|
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
|
||||||
|
++ThisTokBuf; // skip "
|
||||||
|
|
||||||
// Check if this is a pascal string
|
// Check if this is a pascal string
|
||||||
if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
|
if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
|
||||||
|
@ -997,19 +1063,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
|
||||||
} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
|
} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
|
||||||
|
|
||||||
// Copy the character span over.
|
// Copy the character span over.
|
||||||
unsigned Len = ThisTokBuf-InStart;
|
CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart));
|
||||||
if (CharByteWidth == 1) {
|
|
||||||
memcpy(ResultPtr, InStart, Len);
|
|
||||||
ResultPtr += Len;
|
|
||||||
} else {
|
|
||||||
// Note: our internal rep of wide char tokens is always little-endian.
|
|
||||||
for (; Len; --Len, ++InStart) {
|
|
||||||
*ResultPtr++ = InStart[0];
|
|
||||||
// Add zeros at the end.
|
|
||||||
for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
|
|
||||||
*ResultPtr++ = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Is this a Universal Character Name escape?
|
// Is this a Universal Character Name escape?
|
||||||
|
@ -1032,6 +1086,7 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
|
||||||
*ResultPtr++ = ResultChar >> i*8;
|
*ResultPtr++ = ResultChar >> i*8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (Pascal) {
|
if (Pascal) {
|
||||||
ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
|
ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
|
||||||
|
@ -1062,6 +1117,25 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// copyStringFragment - This function copies from Start to End into ResultPtr.
|
||||||
|
/// Performs widening for multi-byte characters.
|
||||||
|
void StringLiteralParser::CopyStringFragment(const StringRef &Fragment) {
|
||||||
|
// Copy the character span over.
|
||||||
|
if (CharByteWidth == 1) {
|
||||||
|
memcpy(ResultPtr, Fragment.data(), Fragment.size());
|
||||||
|
ResultPtr += Fragment.size();
|
||||||
|
} else {
|
||||||
|
// Note: our internal rep of wide char tokens is always little-endian.
|
||||||
|
for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) {
|
||||||
|
*ResultPtr++ = *I;
|
||||||
|
// Add zeros at the end.
|
||||||
|
for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
|
||||||
|
*ResultPtr++ = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// getOffsetOfStringByte - This function returns the offset of the
|
/// getOffsetOfStringByte - This function returns the offset of the
|
||||||
/// specified byte of the string data represented by Token. This handles
|
/// specified byte of the string data represented by Token. This handles
|
||||||
/// advancing over escape sequences in the string.
|
/// advancing over escape sequences in the string.
|
||||||
|
|
|
@ -17,39 +17,53 @@
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
|
|
||||||
|
|
||||||
|
/// IsStringPrefix - Return true if Str is a string prefix.
|
||||||
|
/// 'L', 'u', 'U', or 'u8'. Including raw versions.
|
||||||
|
static bool IsStringPrefix(const StringRef &Str, bool CPlusPlus0x) {
|
||||||
|
|
||||||
|
if (Str[0] == 'L' ||
|
||||||
|
(CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
|
||||||
|
|
||||||
|
if (Str.size() == 1)
|
||||||
|
return true; // "L", "u", "U", and "R"
|
||||||
|
|
||||||
|
// Check for raw flavors. Need to make sure the first character wasn't
|
||||||
|
// already R. Need CPlusPlus0x check for "LR".
|
||||||
|
if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x)
|
||||||
|
return true; // "LR", "uR", "UR"
|
||||||
|
|
||||||
|
// Check for "u8" and "u8R"
|
||||||
|
if (Str[0] == 'u' && Str[1] == '8') {
|
||||||
|
if (Str.size() == 2) return true; // "u8"
|
||||||
|
if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// IsIdentifierStringPrefix - Return true if the spelling of the token
|
/// IsIdentifierStringPrefix - Return true if the spelling of the token
|
||||||
/// is literally 'L', 'u', 'U', or 'u8'.
|
/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions.
|
||||||
bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
|
bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
|
||||||
const LangOptions &LangOpts = PP.getLangOptions();
|
const LangOptions &LangOpts = PP.getLangOptions();
|
||||||
|
|
||||||
if (!Tok.needsCleaning()) {
|
if (!Tok.needsCleaning()) {
|
||||||
if (Tok.getLength() != 1 && Tok.getLength() != 2)
|
if (Tok.getLength() < 1 || Tok.getLength() > 3)
|
||||||
return false;
|
return false;
|
||||||
SourceManager &SM = PP.getSourceManager();
|
SourceManager &SM = PP.getSourceManager();
|
||||||
const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
|
const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
|
||||||
if (Tok.getLength() == 1)
|
return IsStringPrefix(StringRef(Ptr, Tok.getLength()),
|
||||||
return Ptr[0] == 'L' ||
|
LangOpts.CPlusPlus0x);
|
||||||
(LangOpts.CPlusPlus0x && (Ptr[0] == 'u' || Ptr[0] == 'U'));
|
|
||||||
if (Tok.getLength() == 2)
|
|
||||||
return LangOpts.CPlusPlus0x && Ptr[0] == 'u' && Ptr[1] == '8';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Tok.getLength() < 256) {
|
if (Tok.getLength() < 256) {
|
||||||
char Buffer[256];
|
char Buffer[256];
|
||||||
const char *TokPtr = Buffer;
|
const char *TokPtr = Buffer;
|
||||||
unsigned length = PP.getSpelling(Tok, TokPtr);
|
unsigned length = PP.getSpelling(Tok, TokPtr);
|
||||||
if (length == 1)
|
return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x);
|
||||||
return TokPtr[0] == 'L' ||
|
|
||||||
(LangOpts.CPlusPlus0x && (TokPtr[0] == 'u' || TokPtr[0] == 'U'));
|
|
||||||
if (length == 2)
|
|
||||||
return LangOpts.CPlusPlus0x && TokPtr[0] == 'u' && TokPtr[1] == '8';
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string TokStr = PP.getSpelling(Tok);
|
return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x);
|
||||||
return TokStr == "L" || (LangOpts.CPlusPlus0x && (TokStr == "u8" ||
|
|
||||||
TokStr == "u" ||
|
|
||||||
TokStr == "U"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
|
TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
|
// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
|
||||||
// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
|
// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
|
||||||
// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
|
// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CPP0X %s
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
|
@ -38,5 +38,28 @@ int main() {
|
||||||
|
|
||||||
// CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"def\00", align 1
|
// CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"def\00", align 1
|
||||||
const char *g = u8"def";
|
const char *g = u8"def";
|
||||||
|
|
||||||
|
// CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"ghi\00", align 1
|
||||||
|
const char *h = R"foo(ghi)foo";
|
||||||
|
|
||||||
|
// CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"jkl\00", align 1
|
||||||
|
const char *i = u8R"bar(jkl)bar";
|
||||||
|
|
||||||
|
// CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"G\00H\00\00\00", align 2
|
||||||
|
const char16_t *j = uR"foo(GH)foo";
|
||||||
|
|
||||||
|
// CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"I\00\00\00J\00\00\00\00\00\00\00", align 4
|
||||||
|
const char32_t *k = UR"bar(IJ)bar";
|
||||||
|
|
||||||
|
// CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"K\00\00\00L\00\00\00\00\00\00\00", align 4
|
||||||
|
const wchar_t *l = LR"bar(KL)bar";
|
||||||
|
|
||||||
|
// CHECK-CPP0X: private unnamed_addr constant [9 x i8] c"abc\5Cndef\00", align 1
|
||||||
|
const char *m = R"(abc\ndef)";
|
||||||
|
|
||||||
|
// CHECK-CPP0X: private unnamed_addr constant [8 x i8] c"abc\0Adef\00", align 1
|
||||||
|
const char *n = R"(abc
|
||||||
|
def)";
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
|
||||||
|
|
||||||
|
const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
|
||||||
|
// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
|
||||||
|
|
||||||
|
const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
|
|
@ -0,0 +1,8 @@
|
||||||
|
// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
|
||||||
|
|
||||||
|
const char *str = R"foo(abc
|
||||||
|
def)bar";
|
||||||
|
// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
|
||||||
|
|
||||||
|
const char *str = R"foo(abc
|
||||||
|
def)bar";
|
|
@ -7,4 +7,15 @@ void f() {
|
||||||
ustr = u"a UTF-16 string"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [16]'}}
|
ustr = u"a UTF-16 string"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [16]'}}
|
||||||
char32_t *Ustr;
|
char32_t *Ustr;
|
||||||
Ustr = U"a UTF-32 string"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [16]'}}
|
Ustr = U"a UTF-32 string"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [16]'}}
|
||||||
|
|
||||||
|
char *Rstr;
|
||||||
|
Rstr = "a raw string"; // expected-warning{{conversion from string literal to 'char *' is deprecated}}
|
||||||
|
wchar_t *LRstr;
|
||||||
|
LRstr = LR"foo(a wide raw string)foo"; // expected-warning{{conversion from string literal to 'wchar_t *' is deprecated}}
|
||||||
|
char *u8Rstr;
|
||||||
|
u8Rstr = u8R"foo(a UTF-8 raw string)foo"; // expected-error {{assigning to 'char *' from incompatible type 'const char [19]'}}
|
||||||
|
char16_t *uRstr;
|
||||||
|
uRstr = uR"foo(a UTF-16 raw string)foo"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [20]'}}
|
||||||
|
char32_t *URstr;
|
||||||
|
URstr = UR"foo(a UTF-32 raw string)foo"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [20]'}}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue