forked from OSchip/llvm-project
Some support for unicode string constants
in wide strings. radar 8360841. llvm-svn: 112672
This commit is contained in:
parent
a93bb5b807
commit
abaae2b692
|
@ -170,6 +170,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
|
||||||
static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
|
static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
|
||||||
char *&ResultBuf, bool &HadError,
|
char *&ResultBuf, bool &HadError,
|
||||||
SourceLocation Loc, Preprocessor &PP,
|
SourceLocation Loc, Preprocessor &PP,
|
||||||
|
bool wide,
|
||||||
bool Complain) {
|
bool Complain) {
|
||||||
// FIXME: Add a warning - UCN's are only valid in C++ & C99.
|
// FIXME: Add a warning - UCN's are only valid in C++ & C99.
|
||||||
// FIXME: Handle wide strings.
|
// FIXME: Handle wide strings.
|
||||||
|
@ -190,6 +191,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
|
||||||
|
|
||||||
UTF32 UcnVal = 0;
|
UTF32 UcnVal = 0;
|
||||||
unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
|
unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
|
||||||
|
unsigned short UcnLenSave = UcnLen;
|
||||||
for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {
|
for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {
|
||||||
int CharVal = HexDigitValue(ThisTokBuf[0]);
|
int CharVal = HexDigitValue(ThisTokBuf[0]);
|
||||||
if (CharVal == -1) break;
|
if (CharVal == -1) break;
|
||||||
|
@ -214,6 +216,16 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
|
||||||
HadError = 1;
|
HadError = 1;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (wide) {
|
||||||
|
assert(UcnLenSave == 4 &&
|
||||||
|
"ProcessUCNEscape - only ucn length of 4 supported");
|
||||||
|
// little endian assumed.
|
||||||
|
*ResultBuf++ = (UcnVal & 0x000000FF);
|
||||||
|
*ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
|
||||||
|
*ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
|
||||||
|
*ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
|
||||||
|
return;
|
||||||
|
}
|
||||||
// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
|
// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
|
||||||
// The conversion below was inspired by:
|
// The conversion below was inspired by:
|
||||||
// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
|
// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
|
||||||
|
@ -830,12 +842,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
|
const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
|
||||||
|
bool wide = false;
|
||||||
// TODO: Input character set mapping support.
|
// TODO: Input character set mapping support.
|
||||||
|
|
||||||
// Skip L marker for wide strings.
|
// Skip L marker for wide strings.
|
||||||
if (ThisTokBuf[0] == 'L')
|
if (ThisTokBuf[0] == 'L') {
|
||||||
|
wide = true;
|
||||||
++ThisTokBuf;
|
++ThisTokBuf;
|
||||||
|
}
|
||||||
|
|
||||||
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
|
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
|
||||||
++ThisTokBuf;
|
++ThisTokBuf;
|
||||||
|
@ -880,7 +894,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
|
||||||
// Is this a Universal Character Name escape?
|
// Is this a Universal Character Name escape?
|
||||||
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
|
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
|
||||||
ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
|
ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
|
||||||
hadError, StringToks[i].getLocation(), PP, Complain);
|
hadError, StringToks[i].getLocation(), PP, wide,
|
||||||
|
Complain);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Otherwise, this is a non-UCN escape character. Process it.
|
// Otherwise, this is a non-UCN escape character. Process it.
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
|
||||||
|
// rdar://8360841
|
||||||
|
|
||||||
|
wchar_t s[] = L"\u2722";
|
||||||
|
|
||||||
|
// CHECK: @s = global [8 x i8] c"\22'\00\00\00\00\00\00"
|
Loading…
Reference in New Issue