forked from OSchip/llvm-project
Some support for unicode string constants
in wide strings. radar 8360841. llvm-svn: 112672
This commit is contained in:
parent
a93bb5b807
commit
abaae2b692
|
@ -170,6 +170,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
|
|||
static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
|
||||
char *&ResultBuf, bool &HadError,
|
||||
SourceLocation Loc, Preprocessor &PP,
|
||||
bool wide,
|
||||
bool Complain) {
|
||||
// FIXME: Add a warning - UCN's are only valid in C++ & C99.
|
||||
// FIXME: Handle wide strings.
|
||||
|
@ -190,6 +191,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
|
|||
|
||||
UTF32 UcnVal = 0;
|
||||
unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
|
||||
unsigned short UcnLenSave = UcnLen;
|
||||
for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {
|
||||
int CharVal = HexDigitValue(ThisTokBuf[0]);
|
||||
if (CharVal == -1) break;
|
||||
|
@ -214,6 +216,16 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
|
|||
HadError = 1;
|
||||
return;
|
||||
}
|
||||
if (wide) {
|
||||
assert(UcnLenSave == 4 &&
|
||||
"ProcessUCNEscape - only ucn length of 4 supported");
|
||||
// little endian assumed.
|
||||
*ResultBuf++ = (UcnVal & 0x000000FF);
|
||||
*ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
|
||||
*ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
|
||||
*ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
|
||||
return;
|
||||
}
|
||||
// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
|
||||
// The conversion below was inspired by:
|
||||
// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
|
||||
|
@ -830,12 +842,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
|
|||
}
|
||||
|
||||
const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
|
||||
|
||||
bool wide = false;
|
||||
// TODO: Input character set mapping support.
|
||||
|
||||
// Skip L marker for wide strings.
|
||||
if (ThisTokBuf[0] == 'L')
|
||||
if (ThisTokBuf[0] == 'L') {
|
||||
wide = true;
|
||||
++ThisTokBuf;
|
||||
}
|
||||
|
||||
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
|
||||
++ThisTokBuf;
|
||||
|
@ -880,7 +894,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
|
|||
// Is this a Universal Character Name escape?
|
||||
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
|
||||
ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
|
||||
hadError, StringToks[i].getLocation(), PP, Complain);
|
||||
hadError, StringToks[i].getLocation(), PP, wide,
|
||||
Complain);
|
||||
continue;
|
||||
}
|
||||
// Otherwise, this is a non-UCN escape character. Process it.
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
|
||||
// rdar://8360841
|
||||
|
||||
wchar_t s[] = L"\u2722";
|
||||
|
||||
// CHECK: @s = global [8 x i8] c"\22'\00\00\00\00\00\00"
|
Loading…
Reference in New Issue