forked from OSchip/llvm-project
PR18855: Add support for UCNs and UTF-8 encoding within ud-suffixes.
llvm-svn: 201532
This commit is contained in:
parent
6287371ce6
commit
8b7258bdb3
|
@ -614,8 +614,28 @@ private:
|
||||||
/// \return The Unicode codepoint specified by the UCN, or 0 if the UCN is
|
/// \return The Unicode codepoint specified by the UCN, or 0 if the UCN is
|
||||||
/// invalid.
|
/// invalid.
|
||||||
uint32_t tryReadUCN(const char *&CurPtr, const char *SlashLoc, Token *Tok);
|
uint32_t tryReadUCN(const char *&CurPtr, const char *SlashLoc, Token *Tok);
|
||||||
};
|
|
||||||
|
|
||||||
|
/// \brief Try to consume a UCN as part of an identifier at the current
|
||||||
|
/// location.
|
||||||
|
/// \param CurPtr Initially points to the range of characters in the source
|
||||||
|
/// buffer containing the '\'. Updated to point past the end of
|
||||||
|
/// the UCN on success.
|
||||||
|
/// \param Size The number of characters occupied by the '\' (including
|
||||||
|
/// trigraphs and escaped newlines).
|
||||||
|
/// \param Result The token being produced. Marked as containing a UCN on
|
||||||
|
/// success.
|
||||||
|
/// \return \c true if a UCN was lexed and it produced an acceptable
|
||||||
|
/// identifier character, \c false otherwise.
|
||||||
|
bool tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
|
||||||
|
Token &Result);
|
||||||
|
|
||||||
|
/// \brief Try to consume an identifier character encoded in UTF-8.
|
||||||
|
/// \param CurPtr Points to the start of the (potential) UTF-8 code unit
|
||||||
|
/// sequence. On success, updated to point past the end of it.
|
||||||
|
/// \return \c true if a UTF-8 sequence mapping to an acceptable identifier
|
||||||
|
/// character was lexed, \c false otherwise.
|
||||||
|
bool tryConsumeIdentifierUTF8Char(const char *&CurPtr);
|
||||||
|
};
|
||||||
|
|
||||||
} // end namespace clang
|
} // end namespace clang
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,9 @@ class TargetInfo;
|
||||||
class SourceManager;
|
class SourceManager;
|
||||||
class LangOptions;
|
class LangOptions;
|
||||||
|
|
||||||
|
/// Copy characters from Input to Buf, expanding any UCNs.
|
||||||
|
void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
|
||||||
|
|
||||||
/// NumericLiteralParser - This performs strict semantic analysis of the content
|
/// NumericLiteralParser - This performs strict semantic analysis of the content
|
||||||
/// of a ppnumber, classifying it as either integer, floating, or erroneous,
|
/// of a ppnumber, classifying it as either integer, floating, or erroneous,
|
||||||
/// determines the radix of the value and can convert it to a useful value.
|
/// determines the radix of the value and can convert it to a useful value.
|
||||||
|
@ -48,6 +51,8 @@ class NumericLiteralParser {
|
||||||
|
|
||||||
bool saw_exponent, saw_period, saw_ud_suffix;
|
bool saw_exponent, saw_period, saw_ud_suffix;
|
||||||
|
|
||||||
|
SmallString<32> UDSuffixBuf;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
NumericLiteralParser(StringRef TokSpelling,
|
NumericLiteralParser(StringRef TokSpelling,
|
||||||
SourceLocation TokLoc,
|
SourceLocation TokLoc,
|
||||||
|
@ -72,7 +77,7 @@ public:
|
||||||
}
|
}
|
||||||
StringRef getUDSuffix() const {
|
StringRef getUDSuffix() const {
|
||||||
assert(saw_ud_suffix);
|
assert(saw_ud_suffix);
|
||||||
return StringRef(SuffixBegin, ThisTokEnd - SuffixBegin);
|
return UDSuffixBuf;
|
||||||
}
|
}
|
||||||
unsigned getUDSuffixOffset() const {
|
unsigned getUDSuffixOffset() const {
|
||||||
assert(saw_ud_suffix);
|
assert(saw_ud_suffix);
|
||||||
|
|
|
@ -1445,7 +1445,50 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
|
||||||
<< Range;
|
<< Range;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
|
||||||
|
Token &Result) {
|
||||||
|
const char *UCNPtr = CurPtr + Size;
|
||||||
|
uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0);
|
||||||
|
if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!isLexingRawMode())
|
||||||
|
maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
|
||||||
|
makeCharRange(*this, CurPtr, UCNPtr),
|
||||||
|
/*IsFirst=*/false);
|
||||||
|
|
||||||
|
Result.setFlag(Token::HasUCN);
|
||||||
|
if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||
|
||||||
|
(UCNPtr - CurPtr == 10 && CurPtr[1] == 'U'))
|
||||||
|
CurPtr = UCNPtr;
|
||||||
|
else
|
||||||
|
while (CurPtr != UCNPtr)
|
||||||
|
(void)getAndAdvanceChar(CurPtr, Result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {
|
||||||
|
const char *UnicodePtr = CurPtr;
|
||||||
|
UTF32 CodePoint;
|
||||||
|
ConversionResult Result =
|
||||||
|
llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
|
||||||
|
(const UTF8 *)BufferEnd,
|
||||||
|
&CodePoint,
|
||||||
|
strictConversion);
|
||||||
|
if (Result != conversionOK ||
|
||||||
|
!isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!isLexingRawMode())
|
||||||
|
maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
|
||||||
|
makeCharRange(*this, CurPtr, UnicodePtr),
|
||||||
|
/*IsFirst=*/false);
|
||||||
|
|
||||||
|
CurPtr = UnicodePtr;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
|
bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
|
||||||
// Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
|
// Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
|
||||||
|
@ -1500,47 +1543,10 @@ FinishIdentifier:
|
||||||
C = getCharAndSize(CurPtr, Size);
|
C = getCharAndSize(CurPtr, Size);
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
} else if (C == '\\') {
|
} else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
|
||||||
const char *UCNPtr = CurPtr + Size;
|
|
||||||
uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0);
|
|
||||||
if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts))
|
|
||||||
goto FinishIdentifier;
|
|
||||||
|
|
||||||
if (!isLexingRawMode()) {
|
|
||||||
maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
|
|
||||||
makeCharRange(*this, CurPtr, UCNPtr),
|
|
||||||
/*IsFirst=*/false);
|
|
||||||
}
|
|
||||||
|
|
||||||
Result.setFlag(Token::HasUCN);
|
|
||||||
if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||
|
|
||||||
(UCNPtr - CurPtr == 10 && CurPtr[1] == 'U'))
|
|
||||||
CurPtr = UCNPtr;
|
|
||||||
else
|
|
||||||
while (CurPtr != UCNPtr)
|
|
||||||
(void)getAndAdvanceChar(CurPtr, Result);
|
|
||||||
|
|
||||||
C = getCharAndSize(CurPtr, Size);
|
C = getCharAndSize(CurPtr, Size);
|
||||||
continue;
|
continue;
|
||||||
} else if (!isASCII(C)) {
|
} else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
|
||||||
const char *UnicodePtr = CurPtr;
|
|
||||||
UTF32 CodePoint;
|
|
||||||
ConversionResult Result =
|
|
||||||
llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
|
|
||||||
(const UTF8 *)BufferEnd,
|
|
||||||
&CodePoint,
|
|
||||||
strictConversion);
|
|
||||||
if (Result != conversionOK ||
|
|
||||||
!isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
|
|
||||||
goto FinishIdentifier;
|
|
||||||
|
|
||||||
if (!isLexingRawMode()) {
|
|
||||||
maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
|
|
||||||
makeCharRange(*this, CurPtr, UnicodePtr),
|
|
||||||
/*IsFirst=*/false);
|
|
||||||
}
|
|
||||||
|
|
||||||
CurPtr = UnicodePtr;
|
|
||||||
C = getCharAndSize(CurPtr, Size);
|
C = getCharAndSize(CurPtr, Size);
|
||||||
continue;
|
continue;
|
||||||
} else if (!isIdentifierBody(C)) {
|
} else if (!isIdentifierBody(C)) {
|
||||||
|
@ -1576,7 +1582,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
|
||||||
unsigned Size;
|
unsigned Size;
|
||||||
char C = getCharAndSize(CurPtr, Size);
|
char C = getCharAndSize(CurPtr, Size);
|
||||||
char PrevCh = 0;
|
char PrevCh = 0;
|
||||||
while (isPreprocessingNumberBody(C)) { // FIXME: UCNs in ud-suffix.
|
while (isPreprocessingNumberBody(C)) {
|
||||||
CurPtr = ConsumeChar(CurPtr, Size, Result);
|
CurPtr = ConsumeChar(CurPtr, Size, Result);
|
||||||
PrevCh = C;
|
PrevCh = C;
|
||||||
C = getCharAndSize(CurPtr, Size);
|
C = getCharAndSize(CurPtr, Size);
|
||||||
|
@ -1618,6 +1624,12 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we have a UCN or UTF-8 character (perhaps in a ud-suffix), continue.
|
||||||
|
if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
|
||||||
|
return LexNumericConstant(Result, CurPtr);
|
||||||
|
if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
|
||||||
|
return LexNumericConstant(Result, CurPtr);
|
||||||
|
|
||||||
// Update the location of token as well as BufferPtr.
|
// Update the location of token as well as BufferPtr.
|
||||||
const char *TokStart = BufferPtr;
|
const char *TokStart = BufferPtr;
|
||||||
FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
|
FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
|
||||||
|
@ -1631,23 +1643,35 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
|
||||||
bool IsStringLiteral) {
|
bool IsStringLiteral) {
|
||||||
assert(getLangOpts().CPlusPlus);
|
assert(getLangOpts().CPlusPlus);
|
||||||
|
|
||||||
// Maximally munch an identifier. FIXME: UCNs.
|
// Maximally munch an identifier.
|
||||||
unsigned Size;
|
unsigned Size;
|
||||||
char C = getCharAndSize(CurPtr, Size);
|
char C = getCharAndSize(CurPtr, Size);
|
||||||
if (isIdentifierHead(C)) {
|
bool Consumed = false;
|
||||||
if (!getLangOpts().CPlusPlus11) {
|
|
||||||
if (!isLexingRawMode())
|
|
||||||
Diag(CurPtr,
|
|
||||||
C == '_' ? diag::warn_cxx11_compat_user_defined_literal
|
|
||||||
: diag::warn_cxx11_compat_reserved_user_defined_literal)
|
|
||||||
<< FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
|
|
||||||
return CurPtr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix
|
if (!isIdentifierHead(C)) {
|
||||||
// that does not start with an underscore is ill-formed. As a conforming
|
if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
|
||||||
// extension, we treat all such suffixes as if they had whitespace before
|
Consumed = true;
|
||||||
// them.
|
else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
|
||||||
|
Consumed = true;
|
||||||
|
else
|
||||||
|
return CurPtr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!getLangOpts().CPlusPlus11) {
|
||||||
|
if (!isLexingRawMode())
|
||||||
|
Diag(CurPtr,
|
||||||
|
C == '_' ? diag::warn_cxx11_compat_user_defined_literal
|
||||||
|
: diag::warn_cxx11_compat_reserved_user_defined_literal)
|
||||||
|
<< FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
|
||||||
|
return CurPtr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix
|
||||||
|
// that does not start with an underscore is ill-formed. As a conforming
|
||||||
|
// extension, we treat all such suffixes as if they had whitespace before
|
||||||
|
// them. We assume a suffix beginning with a UCN or UTF-8 character is more
|
||||||
|
// likely to be a ud-suffix than a macro, however, and accept that.
|
||||||
|
if (!Consumed) {
|
||||||
bool IsUDSuffix = false;
|
bool IsUDSuffix = false;
|
||||||
if (C == '_')
|
if (C == '_')
|
||||||
IsUDSuffix = true;
|
IsUDSuffix = true;
|
||||||
|
@ -1685,16 +1709,22 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
|
||||||
Diag(CurPtr, getLangOpts().MSVCCompat
|
Diag(CurPtr, getLangOpts().MSVCCompat
|
||||||
? diag::ext_ms_reserved_user_defined_literal
|
? diag::ext_ms_reserved_user_defined_literal
|
||||||
: diag::ext_reserved_user_defined_literal)
|
: diag::ext_reserved_user_defined_literal)
|
||||||
<< FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
|
<< FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
|
||||||
return CurPtr;
|
return CurPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
Result.setFlag(Token::HasUDSuffix);
|
CurPtr = ConsumeChar(CurPtr, Size, Result);
|
||||||
do {
|
|
||||||
CurPtr = ConsumeChar(CurPtr, Size, Result);
|
|
||||||
C = getCharAndSize(CurPtr, Size);
|
|
||||||
} while (isIdentifierBody(C));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Result.setFlag(Token::HasUDSuffix);
|
||||||
|
while (true) {
|
||||||
|
C = getCharAndSize(CurPtr, Size);
|
||||||
|
if (isIdentifierBody(C)) { CurPtr = ConsumeChar(CurPtr, Size, Result); }
|
||||||
|
else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
|
||||||
|
else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
|
||||||
return CurPtr;
|
return CurPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -212,6 +212,48 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
|
||||||
return ResultChar;
|
return ResultChar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void appendCodePoint(unsigned Codepoint,
|
||||||
|
llvm::SmallVectorImpl<char> &Str) {
|
||||||
|
char ResultBuf[4];
|
||||||
|
char *ResultPtr = ResultBuf;
|
||||||
|
bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr);
|
||||||
|
(void)Res;
|
||||||
|
assert(Res && "Unexpected conversion failure");
|
||||||
|
Str.append(ResultBuf, ResultPtr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
|
||||||
|
for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
|
||||||
|
if (*I != '\\') {
|
||||||
|
Buf.push_back(*I);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
++I;
|
||||||
|
assert(*I == 'u' || *I == 'U');
|
||||||
|
|
||||||
|
unsigned NumHexDigits;
|
||||||
|
if (*I == 'u')
|
||||||
|
NumHexDigits = 4;
|
||||||
|
else
|
||||||
|
NumHexDigits = 8;
|
||||||
|
|
||||||
|
assert(I + NumHexDigits <= E);
|
||||||
|
|
||||||
|
uint32_t CodePoint = 0;
|
||||||
|
for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
|
||||||
|
unsigned Value = llvm::hexDigitValue(*I);
|
||||||
|
assert(Value != -1U);
|
||||||
|
|
||||||
|
CodePoint <<= 4;
|
||||||
|
CodePoint += Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
appendCodePoint(CodePoint, Buf);
|
||||||
|
--I;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
|
/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
|
||||||
/// return the UTF32.
|
/// return the UTF32.
|
||||||
static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
|
static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
|
||||||
|
@ -625,8 +667,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s != ThisTokEnd) {
|
if (s != ThisTokEnd) {
|
||||||
if (isValidUDSuffix(PP.getLangOpts(),
|
// FIXME: Don't bother expanding UCNs if !tok.hasUCN().
|
||||||
StringRef(SuffixBegin, ThisTokEnd - SuffixBegin))) {
|
expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
|
||||||
|
if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) {
|
||||||
// Any suffix pieces we might have parsed are actually part of the
|
// Any suffix pieces we might have parsed are actually part of the
|
||||||
// ud-suffix.
|
// ud-suffix.
|
||||||
isLong = false;
|
isLong = false;
|
||||||
|
@ -992,7 +1035,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
|
||||||
do {
|
do {
|
||||||
--end;
|
--end;
|
||||||
} while (end[-1] != '\'');
|
} while (end[-1] != '\'');
|
||||||
UDSuffixBuf.assign(end, UDSuffixEnd);
|
// FIXME: Don't bother with this if !tok.hasUCN().
|
||||||
|
expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
|
||||||
UDSuffixOffset = end - TokBegin;
|
UDSuffixOffset = end - TokBegin;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1311,23 +1355,34 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
|
||||||
StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
|
StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
|
||||||
|
|
||||||
if (UDSuffixBuf.empty()) {
|
if (UDSuffixBuf.empty()) {
|
||||||
UDSuffixBuf.assign(UDSuffix);
|
if (StringToks[i].hasUCN())
|
||||||
|
expandUCNs(UDSuffixBuf, UDSuffix);
|
||||||
|
else
|
||||||
|
UDSuffixBuf.assign(UDSuffix);
|
||||||
UDSuffixToken = i;
|
UDSuffixToken = i;
|
||||||
UDSuffixOffset = ThisTokEnd - ThisTokBuf;
|
UDSuffixOffset = ThisTokEnd - ThisTokBuf;
|
||||||
UDSuffixTokLoc = StringToks[i].getLocation();
|
UDSuffixTokLoc = StringToks[i].getLocation();
|
||||||
} else if (!UDSuffixBuf.equals(UDSuffix)) {
|
} else {
|
||||||
|
SmallString<32> ExpandedUDSuffix;
|
||||||
|
if (StringToks[i].hasUCN()) {
|
||||||
|
expandUCNs(ExpandedUDSuffix, UDSuffix);
|
||||||
|
UDSuffix = ExpandedUDSuffix;
|
||||||
|
}
|
||||||
|
|
||||||
// C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
|
// C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
|
||||||
// result of a concatenation involving at least one user-defined-string-
|
// result of a concatenation involving at least one user-defined-string-
|
||||||
// literal, all the participating user-defined-string-literals shall
|
// literal, all the participating user-defined-string-literals shall
|
||||||
// have the same ud-suffix.
|
// have the same ud-suffix.
|
||||||
if (Diags) {
|
if (!UDSuffixBuf.equals(UDSuffix)) {
|
||||||
SourceLocation TokLoc = StringToks[i].getLocation();
|
if (Diags) {
|
||||||
Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
|
SourceLocation TokLoc = StringToks[i].getLocation();
|
||||||
<< UDSuffixBuf << UDSuffix
|
Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
|
||||||
<< SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
|
<< UDSuffixBuf << UDSuffix
|
||||||
<< SourceRange(TokLoc, TokLoc);
|
<< SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
|
||||||
|
<< SourceRange(TokLoc, TokLoc);
|
||||||
|
}
|
||||||
|
hadError = true;
|
||||||
}
|
}
|
||||||
hadError = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -503,48 +503,6 @@ void Preprocessor::EndSourceFile() {
|
||||||
// Lexer Event Handling.
|
// Lexer Event Handling.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
static void appendCodePoint(unsigned Codepoint,
|
|
||||||
llvm::SmallVectorImpl<char> &Str) {
|
|
||||||
char ResultBuf[4];
|
|
||||||
char *ResultPtr = ResultBuf;
|
|
||||||
bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr);
|
|
||||||
(void)Res;
|
|
||||||
assert(Res && "Unexpected conversion failure");
|
|
||||||
Str.append(ResultBuf, ResultPtr);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
|
|
||||||
for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
|
|
||||||
if (*I != '\\') {
|
|
||||||
Buf.push_back(*I);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
++I;
|
|
||||||
assert(*I == 'u' || *I == 'U');
|
|
||||||
|
|
||||||
unsigned NumHexDigits;
|
|
||||||
if (*I == 'u')
|
|
||||||
NumHexDigits = 4;
|
|
||||||
else
|
|
||||||
NumHexDigits = 8;
|
|
||||||
|
|
||||||
assert(I + NumHexDigits <= E);
|
|
||||||
|
|
||||||
uint32_t CodePoint = 0;
|
|
||||||
for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
|
|
||||||
unsigned Value = llvm::hexDigitValue(*I);
|
|
||||||
assert(Value != -1U);
|
|
||||||
|
|
||||||
CodePoint <<= 4;
|
|
||||||
CodePoint += Value;
|
|
||||||
}
|
|
||||||
|
|
||||||
appendCodePoint(CodePoint, Buf);
|
|
||||||
--I;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
|
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
|
||||||
/// identifier information for the token and install it into the token,
|
/// identifier information for the token and install it into the token,
|
||||||
/// updating the token kind accordingly.
|
/// updating the token kind accordingly.
|
||||||
|
|
|
@ -111,3 +111,35 @@ void operator "" ""
|
||||||
U"" // expected-error {{cannot have an encoding prefix}}
|
U"" // expected-error {{cannot have an encoding prefix}}
|
||||||
"" _also_not_char(const char *);
|
"" _also_not_char(const char *);
|
||||||
void operator "" u8"" "\u0123" "hello"_all_of_the_things ""(const char*); // expected-error {{must be '""'}}
|
void operator "" u8"" "\u0123" "hello"_all_of_the_things ""(const char*); // expected-error {{must be '""'}}
|
||||||
|
|
||||||
|
// Make sure we treat UCNs and UTF-8 as equivalent.
|
||||||
|
int operator""_µs(unsigned long long) {} // expected-note {{previous}}
|
||||||
|
int hundred_µs = 50_µs + 50_\u00b5s;
|
||||||
|
int operator""_\u00b5s(unsigned long long) {} // expected-error {{redefinition of 'operator "" _µs'}}
|
||||||
|
|
||||||
|
int operator""_\U0000212B(long double) {} // expected-note {{previous}}
|
||||||
|
int hundred_Å = 50.0_Å + 50._\U0000212B;
|
||||||
|
int operator""_Å(long double) {} // expected-error {{redefinition of 'operator "" _Å'}}
|
||||||
|
|
||||||
|
int operator""_𐀀(char) {} // expected-note {{previous}}
|
||||||
|
int 𐀀 = '4'_𐀀 + '2'_\U00010000;
|
||||||
|
int operator""_\U00010000(char) {} // expected-error {{redefinition of 'operator "" _𐀀'}}
|
||||||
|
|
||||||
|
// These all declare the same function.
|
||||||
|
int operator""_℮""_\u212e""_\U0000212e""(const char*, size_t);
|
||||||
|
int operator""_\u212e""_\U0000212e""_℮""(const char*, size_t);
|
||||||
|
int operator""_\U0000212e""_℮""_\u212e""(const char*, size_t);
|
||||||
|
int mix_ucn_utf8 = ""_℮""_\u212e""_\U0000212e"";
|
||||||
|
|
||||||
|
void operator""_℮""_ℯ(unsigned long long) {} // expected-error {{differing user-defined suffixes ('_℮' and '_ℯ') in string literal concatenation}}
|
||||||
|
void operator""_℮""_\u212f(unsigned long long) {} // expected-error {{differing user-defined suffixes ('_℮' and '_ℯ') in string literal concatenation}}
|
||||||
|
void operator""_\u212e""_ℯ(unsigned long long) {} // expected-error {{differing user-defined suffixes ('_℮' and '_ℯ') in string literal concatenation}}
|
||||||
|
void operator""_\u212e""_\u212f(unsigned long long) {} // expected-error {{differing user-defined suffixes ('_℮' and '_ℯ') in string literal concatenation}}
|
||||||
|
|
||||||
|
void operator""_℮""_℮(unsigned long long) {} // expected-note {{previous}}
|
||||||
|
void operator""_\u212e""_\u212e(unsigned long long) {} // expected-error {{redefinition}}
|
||||||
|
|
||||||
|
#define ¢ *0.01 // expected-error {{macro names must be identifiers}}
|
||||||
|
constexpr int operator""_¢(long double d) { return d * 100; } // expected-error {{non-ASCII}}
|
||||||
|
constexpr int operator""_¢(unsigned long long n) { return n; } // expected-error {{non-ASCII}}
|
||||||
|
static_assert(0.02_¢ == 2_¢, ""); // expected-error 2{{non-ASCII}}
|
||||||
|
|
Loading…
Reference in New Issue