Some support for unicode string constants

in wide strings. radar 8360841.

llvm-svn: 112672
This commit is contained in:
Fariborz Jahanian 2010-08-31 23:34:27 +00:00
parent a93bb5b807
commit abaae2b692
2 changed files with 24 additions and 3 deletions

View File

@ -170,6 +170,7 @@ static unsigned ProcessCharEscape(const char *&ThisTokBuf,
static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
char *&ResultBuf, bool &HadError, char *&ResultBuf, bool &HadError,
SourceLocation Loc, Preprocessor &PP, SourceLocation Loc, Preprocessor &PP,
bool wide,
bool Complain) { bool Complain) {
// FIXME: Add a warning - UCN's are only valid in C++ & C99. // FIXME: Add a warning - UCN's are only valid in C++ & C99.
// FIXME: Handle wide strings. // FIXME: Handle wide strings.
@ -190,6 +191,7 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
UTF32 UcnVal = 0; UTF32 UcnVal = 0;
unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
unsigned short UcnLenSave = UcnLen;
for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) { for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {
int CharVal = HexDigitValue(ThisTokBuf[0]); int CharVal = HexDigitValue(ThisTokBuf[0]);
if (CharVal == -1) break; if (CharVal == -1) break;
@ -214,6 +216,16 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
HadError = 1; HadError = 1;
return; return;
} }
if (wide) {
assert(UcnLenSave == 4 &&
"ProcessUCNEscape - only ucn length of 4 supported");
// little endian assumed.
*ResultBuf++ = (UcnVal & 0x000000FF);
*ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
*ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
*ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
return;
}
// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8. // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
// The conversion below was inspired by: // The conversion below was inspired by:
// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
@ -830,12 +842,14 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
} }
const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
bool wide = false;
// TODO: Input character set mapping support. // TODO: Input character set mapping support.
// Skip L marker for wide strings. // Skip L marker for wide strings.
if (ThisTokBuf[0] == 'L') if (ThisTokBuf[0] == 'L') {
wide = true;
++ThisTokBuf; ++ThisTokBuf;
}
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
++ThisTokBuf; ++ThisTokBuf;
@ -880,7 +894,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
// Is this a Universal Character Name escape? // Is this a Universal Character Name escape?
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') { if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
hadError, StringToks[i].getLocation(), PP, Complain); hadError, StringToks[i].getLocation(), PP, wide,
Complain);
continue; continue;
} }
// Otherwise, this is a non-UCN escape character. Process it. // Otherwise, this is a non-UCN escape character. Process it.

View File

@ -0,0 +1,6 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
// rdar://8360841
wchar_t s[] = L"\u2722";
// CHECK: @s = global [8 x i8] c"\22'\00\00\00\00\00\00"